LLVM 19.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallSet.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/CallingConv.h"
61#include "llvm/IR/Constant.h"
62#include "llvm/IR/Constants.h"
63#include "llvm/IR/DataLayout.h"
64#include "llvm/IR/DebugLoc.h"
66#include "llvm/IR/Function.h"
67#include "llvm/IR/GlobalValue.h"
68#include "llvm/IR/IRBuilder.h"
70#include "llvm/IR/Intrinsics.h"
71#include "llvm/IR/IntrinsicsPowerPC.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/Value.h"
76#include "llvm/MC/MCContext.h"
77#include "llvm/MC/MCExpr.h"
87#include "llvm/Support/Debug.h"
89#include "llvm/Support/Format.h"
95#include <algorithm>
96#include <cassert>
97#include <cstdint>
98#include <iterator>
99#include <list>
100#include <optional>
101#include <utility>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE "ppc-lowering"
107
108static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
109cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
110
111static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
112cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
113
114static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
115cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
116
117static cl::opt<bool> DisableSCO("disable-ppc-sco",
118cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
119
120static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
121cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
122
123static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
124cl::desc("use absolute jump tables on ppc"), cl::Hidden);
125
126static cl::opt<bool>
127 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
128 cl::desc("disable vector permute decomposition"),
129 cl::init(true), cl::Hidden);
130
132 "disable-auto-paired-vec-st",
133 cl::desc("disable automatically generated 32byte paired vector stores"),
134 cl::init(true), cl::Hidden);
135
137 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
138 cl::desc("Set minimum number of entries to use a jump table on PPC"));
139
141 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
142 cl::desc("max depth when checking alias info in GatherAllAliases()"));
143
145 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
146 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
147 "function to use initial-exec"));
148
149STATISTIC(NumTailCalls, "Number of tail calls");
150STATISTIC(NumSiblingCalls, "Number of sibling calls");
151STATISTIC(ShufflesHandledWithVPERM,
152 "Number of shuffles lowered to a VPERM or XXPERM");
153STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
154
155static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
156
157static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
158
159static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
160
161// A faster local-[exec|dynamic] TLS access sequence (enabled with the
162// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
163// variables; consistent with the IBM XL compiler, we apply a max size of
164// slightly under 32KB.
166
167// FIXME: Remove this once the bug has been fixed!
169
171 const PPCSubtarget &STI)
172 : TargetLowering(TM), Subtarget(STI) {
173 // Initialize map that relates the PPC addressing modes to the computed flags
174 // of a load/store instruction. The map is used to determine the optimal
175 // addressing mode when selecting load and stores.
176 initializeAddrModeMap();
177 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
178 // arguments are at least 4/8 bytes aligned.
179 bool isPPC64 = Subtarget.isPPC64();
180 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
181
182 // Set up the register classes.
183 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
184 if (!useSoftFloat()) {
185 if (hasSPE()) {
186 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
187 // EFPU2 APU only supports f32
188 if (!Subtarget.hasEFPU2())
189 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
190 } else {
191 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
192 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
193 }
194 }
195
196 // Match BITREVERSE to customized fast code sequence in the td file.
199
200 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
202
203 // Custom lower inline assembly to check for special registers.
206
207 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
208 for (MVT VT : MVT::integer_valuetypes()) {
211 }
212
213 if (Subtarget.isISA3_0()) {
214 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
215 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
216 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
217 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
218 } else {
219 // No extending loads from f16 or HW conversions back and forth.
220 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
223 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
226 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
227 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
228 }
229
230 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
231
232 // PowerPC has pre-inc load and store's.
243 if (!Subtarget.hasSPE()) {
248 }
249
250 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
251 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
252 for (MVT VT : ScalarIntVTs) {
257 }
258
259 if (Subtarget.useCRBits()) {
261
262 if (isPPC64 || Subtarget.hasFPCVT()) {
265 isPPC64 ? MVT::i64 : MVT::i32);
268 isPPC64 ? MVT::i64 : MVT::i32);
269
272 isPPC64 ? MVT::i64 : MVT::i32);
275 isPPC64 ? MVT::i64 : MVT::i32);
276
279 isPPC64 ? MVT::i64 : MVT::i32);
282 isPPC64 ? MVT::i64 : MVT::i32);
283
286 isPPC64 ? MVT::i64 : MVT::i32);
289 isPPC64 ? MVT::i64 : MVT::i32);
290 } else {
295 }
296
297 // PowerPC does not support direct load/store of condition registers.
300
301 // FIXME: Remove this once the ANDI glue bug is fixed:
302 if (ANDIGlueBug)
304
305 for (MVT VT : MVT::integer_valuetypes()) {
308 setTruncStoreAction(VT, MVT::i1, Expand);
309 }
310
311 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
312 }
313
314 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
315 // PPC (the libcall is not available).
320
321 // We do not currently implement these libm ops for PowerPC.
322 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
323 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
324 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
325 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
327 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
328
329 // PowerPC has no SREM/UREM instructions unless we are on P9
330 // On P9 we may use a hardware instruction to compute the remainder.
331 // When the result of both the remainder and the division is required it is
332 // more efficient to compute the remainder from the result of the division
333 // rather than use the remainder instruction. The instructions are legalized
334 // directly because the DivRemPairsPass performs the transformation at the IR
335 // level.
336 if (Subtarget.isISA3_0()) {
341 } else {
346 }
347
348 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
357
358 // Handle constrained floating-point operations of scalar.
359 // TODO: Handle SPE specific operation.
365
370
371 if (!Subtarget.hasSPE()) {
374 }
375
376 if (Subtarget.hasVSX()) {
379 }
380
381 if (Subtarget.hasFSQRT()) {
384 }
385
386 if (Subtarget.hasFPRND()) {
391
396 }
397
398 // We don't support sin/cos/sqrt/fmod/pow
409
410 // MASS transformation for LLVM intrinsics with replicating fast-math flag
411 // to be consistent to PPCGenScalarMASSEntries pass
412 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
425 }
426
427 if (Subtarget.hasSPE()) {
430 } else {
431 setOperationAction(ISD::FMA , MVT::f64, Legal);
432 setOperationAction(ISD::FMA , MVT::f32, Legal);
433 }
434
435 if (Subtarget.hasSPE())
436 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
437
439
440 // If we're enabling GP optimizations, use hardware square root
441 if (!Subtarget.hasFSQRT() &&
442 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
443 Subtarget.hasFRE()))
445
446 if (!Subtarget.hasFSQRT() &&
447 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
448 Subtarget.hasFRES()))
450
451 if (Subtarget.hasFCPSGN()) {
454 } else {
457 }
458
459 if (Subtarget.hasFPRND()) {
464
469 }
470
471 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
472 // instruction xxbrd to speed up scalar BSWAP64.
473 if (Subtarget.isISA3_1()) {
476 } else {
479 ISD::BSWAP, MVT::i64,
480 (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
481 }
482
483 // CTPOP or CTTZ were introduced in P8/P9 respectively
484 if (Subtarget.isISA3_0()) {
485 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
486 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
487 } else {
488 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
489 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
490 }
491
492 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
495 } else {
498 }
499
500 // PowerPC does not have ROTR
503
504 if (!Subtarget.useCRBits()) {
505 // PowerPC does not have Select
510 }
511
512 // PowerPC wants to turn select_cc of FP into fsel when possible.
515
516 // PowerPC wants to optimize integer setcc a bit
517 if (!Subtarget.useCRBits())
519
520 if (Subtarget.hasFPU()) {
524
528 }
529
530 // PowerPC does not have BRCOND which requires SetCC
531 if (!Subtarget.useCRBits())
533
535
536 if (Subtarget.hasSPE()) {
537 // SPE has built-in conversions
544
545 // SPE supports signaling compare of f32/f64.
548 } else {
549 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
552
553 // PowerPC does not have [U|S]INT_TO_FP
558 }
559
560 if (Subtarget.hasDirectMove() && isPPC64) {
565 if (TM.Options.UnsafeFPMath) {
574 }
575 } else {
580 }
581
582 // We cannot sextinreg(i1). Expand to shifts.
584
585 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
586 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
587 // support continuation, user-level threading, and etc.. As a result, no
588 // other SjLj exception interfaces are implemented and please don't build
589 // your own exception handling based on them.
590 // LLVM/Clang supports zero-cost DWARF exception handling.
593
594 // We want to legalize GlobalAddress and ConstantPool nodes into the
595 // appropriate instructions to materialize the address.
606
607 // TRAP is legal.
608 setOperationAction(ISD::TRAP, MVT::Other, Legal);
609
610 // TRAMPOLINE is custom lowered.
613
614 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
616
617 if (Subtarget.is64BitELFABI()) {
618 // VAARG always uses double-word chunks, so promote anything smaller.
620 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
622 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
624 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
626 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
628 } else if (Subtarget.is32BitELFABI()) {
629 // VAARG is custom lowered with the 32-bit SVR4 ABI.
632 } else
634
635 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
636 if (Subtarget.is32BitELFABI())
638 else
640
641 // Use the default implementation.
642 setOperationAction(ISD::VAEND , MVT::Other, Expand);
651
652 // We want to custom lower some of our intrinsics.
658
659 // To handle counter-based loop conditions.
661
666
667 // Comparisons that require checking two conditions.
668 if (Subtarget.hasSPE()) {
673 }
686
689
690 if (Subtarget.has64BitSupport()) {
691 // They also have instructions for converting between i64 and fp.
700 // This is just the low 32 bits of a (signed) fp->i64 conversion.
701 // We cannot do this with Promote because i64 is not a legal type.
704
705 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
708 }
709 } else {
710 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
711 if (Subtarget.hasSPE()) {
714 } else {
717 }
718 }
719
720 // With the instructions enabled under FPCVT, we can do everything.
721 if (Subtarget.hasFPCVT()) {
722 if (Subtarget.has64BitSupport()) {
731 }
732
741 }
742
743 if (Subtarget.use64BitRegs()) {
744 // 64-bit PowerPC implementations can support i64 types directly
745 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
746 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
748 // 64-bit PowerPC wants to expand i128 shifts itself.
752 } else {
753 // 32-bit PowerPC wants to expand i64 shifts itself.
757 }
758
759 // PowerPC has better expansions for funnel shifts than the generic
760 // TargetLowering::expandFunnelShift.
761 if (Subtarget.has64BitSupport()) {
764 }
767
768 if (Subtarget.hasVSX()) {
773 }
774
775 if (Subtarget.hasAltivec()) {
776 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
781 }
782 // First set operation action for all vector types to expand. Then we
783 // will selectively turn on ones that can be effectively codegen'd.
785 // add/sub are legal for all supported vector VT's.
788
789 // For v2i64, these are only valid with P8Vector. This is corrected after
790 // the loop.
791 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
796 }
797 else {
802 }
803
804 if (Subtarget.hasVSX()) {
807 }
808
809 // Vector instructions introduced in P8
810 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
813 }
814 else {
817 }
818
819 // Vector instructions introduced in P9
820 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
822 else
824
825 // We promote all shuffles to v16i8.
827 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
828
829 // We promote all non-typed operations to v4i32.
831 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
833 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
835 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
837 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
839 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
842 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
844 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
845
846 // No other operations are legal.
885
886 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
887 setTruncStoreAction(VT, InnerVT, Expand);
890 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
891 }
892 }
894 if (!Subtarget.hasP8Vector()) {
895 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
896 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
897 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
898 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
899 }
900
901 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
902 // with merges, splats, etc.
904
905 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
906 // are cheap, so handle them before they get expanded to scalar.
912
913 setOperationAction(ISD::AND , MVT::v4i32, Legal);
914 setOperationAction(ISD::OR , MVT::v4i32, Legal);
915 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
916 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
918 Subtarget.useCRBits() ? Legal : Expand);
919 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
929 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
932
933 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
934 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
935 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
936 if (Subtarget.hasAltivec())
937 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
939 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
940 if (Subtarget.hasP8Altivec())
941 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
942
943 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
944 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
945 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
946 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
947
948 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
949 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
950
951 if (Subtarget.hasVSX()) {
952 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
953 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
955 }
956
957 if (Subtarget.hasP8Altivec())
958 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
959 else
960 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
961
962 if (Subtarget.isISA3_1()) {
963 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
964 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
965 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
966 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
967 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
968 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
969 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
970 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
971 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
972 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
973 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
974 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
975 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
976 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
977 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
978 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
979 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
980 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
981 }
982
983 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
984 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
985
988
993
994 // Altivec does not contain unordered floating-point compare instructions
995 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
997 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
999
1000 if (Subtarget.hasVSX()) {
1003 if (Subtarget.hasP8Vector()) {
1006 }
1007 if (Subtarget.hasDirectMove() && isPPC64) {
1016 }
1018
1019 // The nearbyint variants are not allowed to raise the inexact exception
1020 // so we can only code-gen them with unsafe math.
1021 if (TM.Options.UnsafeFPMath) {
1024 }
1025
1026 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1027 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1028 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1030 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1031 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1034
1036 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1037 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1040
1041 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1042 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1043
1044 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1045 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1046
1047 // Share the Altivec comparison restrictions.
1048 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1049 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1050 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1051 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1052
1053 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1054 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1055
1057
1058 if (Subtarget.hasP8Vector())
1059 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1060
1061 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1062
1063 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1064 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1065 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1066
1067 if (Subtarget.hasP8Altivec()) {
1068 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1069 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1070 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1071
1072 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1073 // SRL, but not for SRA because of the instructions available:
1074 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1075 // doing
1076 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1077 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1078 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1079
1080 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1081 }
1082 else {
1083 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1084 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1085 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1086
1087 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1088
1089 // VSX v2i64 only supports non-arithmetic operations.
1090 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1091 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1092 }
1093
1094 if (Subtarget.isISA3_1())
1095 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1096 else
1097 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1098
1099 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1100 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1102 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1103
1105
1114
1115 // Custom handling for partial vectors of integers converted to
1116 // floating point. We already have optimal handling for v2i32 through
1117 // the DAG combine, so those aren't necessary.
1134
1135 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1136 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1137 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1138 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1141
1144
1145 // Handle constrained floating-point operations of vector.
1146 // The predictor is `hasVSX` because altivec instruction has
1147 // no exception but VSX vector instruction has.
1161
1175
1176 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1177 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1178
1179 for (MVT FPT : MVT::fp_valuetypes())
1180 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1181
1182 // Expand the SELECT to SELECT_CC
1184
1185 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1186 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1187
1188 // No implementation for these ops for PowerPC.
1190 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1191 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1192 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1194 setOperationAction(ISD::FREM, MVT::f128, Expand);
1195 }
1196
1197 if (Subtarget.hasP8Altivec()) {
1198 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1199 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1200 }
1201
1202 if (Subtarget.hasP9Vector()) {
1205
1206 // Test data class instructions store results in CR bits.
1207 if (Subtarget.useCRBits()) {
1211 }
1212
1213 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1214 // SRL, but not for SRA because of the instructions available:
1215 // VS{RL} and VS{RL}O.
1216 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1217 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1218 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1219
1220 setOperationAction(ISD::FADD, MVT::f128, Legal);
1221 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1222 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1223 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1225
1226 setOperationAction(ISD::FMA, MVT::f128, Legal);
1233
1235 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1237 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1240
1244
1245 // Handle constrained floating-point operations of fp128
1262 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1263 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1264 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1265 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1266 } else if (Subtarget.hasVSX()) {
1269
1270 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1271 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1272
1273 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1274 // fp_to_uint and int_to_fp.
1277
1278 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1279 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1280 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1281 setOperationAction(ISD::FABS, MVT::f128, Expand);
1283 setOperationAction(ISD::FMA, MVT::f128, Expand);
1285
1286 // Expand the fp_extend if the target type is fp128.
1289
1290 // Expand the fp_round if the source type is fp128.
1291 for (MVT VT : {MVT::f32, MVT::f64}) {
1294 }
1295
1300
1301 // Lower following f128 select_cc pattern:
1302 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1304
1305 // We need to handle f128 SELECT_CC with integer result type.
1307 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1308 }
1309
1310 if (Subtarget.hasP9Altivec()) {
1311 if (Subtarget.isISA3_1()) {
1316 } else {
1319 }
1327
1328 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1329 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1330 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1331 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1332 }
1333
1334 if (Subtarget.hasP10Vector()) {
1336 }
1337 }
1338
1339 if (Subtarget.pairedVectorMemops()) {
1340 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1341 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1342 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1343 }
1344 if (Subtarget.hasMMA()) {
1345 if (Subtarget.isISAFuture())
1346 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1347 else
1348 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1349 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1350 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1352 }
1353
1354 if (Subtarget.has64BitSupport())
1356
1357 if (Subtarget.isISA3_1())
1358 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1359
1360 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1361
1362 if (!isPPC64) {
1365 }
1366
1371 }
1372
1374
1375 if (Subtarget.hasAltivec()) {
1376 // Altivec instructions set fields to all zeros or all ones.
1378 }
1379
1382 else if (isPPC64)
1384 else
1386
1387 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1388
1389 // We have target-specific dag combine patterns for the following nodes:
1392 if (Subtarget.hasFPCVT())
1395 if (Subtarget.useCRBits())
1399
1401
1403
1404 if (Subtarget.useCRBits()) {
1406 }
1407
1408 setLibcallName(RTLIB::LOG_F128, "logf128");
1409 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1410 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1411 setLibcallName(RTLIB::EXP_F128, "expf128");
1412 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1413 setLibcallName(RTLIB::SIN_F128, "sinf128");
1414 setLibcallName(RTLIB::COS_F128, "cosf128");
1415 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1416 setLibcallName(RTLIB::POW_F128, "powf128");
1417 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1418 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1419 setLibcallName(RTLIB::REM_F128, "fmodf128");
1420 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1421 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1422 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1423 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1424 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1425 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1426 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1427 setLibcallName(RTLIB::RINT_F128, "rintf128");
1428 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1429 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1430 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1431 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1432 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1433
1434 if (Subtarget.isAIXABI()) {
1435 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1436 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1437 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1438 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1439 }
1440
1441 // With 32 condition bits, we don't need to sink (and duplicate) compares
1442 // aggressively in CodeGenPrep.
1443 if (Subtarget.useCRBits()) {
1446 }
1447
1448 // TODO: The default entry number is set to 64. This stops most jump table
1449 // generation on PPC. But it is good for current PPC HWs because the indirect
1450 // branch instruction mtctr to the jump table may lead to bad branch predict.
1451 // Re-evaluate this value on future HWs that can do better with mtctr.
1453
1455
1456 switch (Subtarget.getCPUDirective()) {
1457 default: break;
1458 case PPC::DIR_970:
1459 case PPC::DIR_A2:
1460 case PPC::DIR_E500:
1461 case PPC::DIR_E500mc:
1462 case PPC::DIR_E5500:
1463 case PPC::DIR_PWR4:
1464 case PPC::DIR_PWR5:
1465 case PPC::DIR_PWR5X:
1466 case PPC::DIR_PWR6:
1467 case PPC::DIR_PWR6X:
1468 case PPC::DIR_PWR7:
1469 case PPC::DIR_PWR8:
1470 case PPC::DIR_PWR9:
1471 case PPC::DIR_PWR10:
1475 break;
1476 }
1477
1478 if (Subtarget.enableMachineScheduler())
1480 else
1482
1484
1485 // The Freescale cores do better with aggressive inlining of memcpy and
1486 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1487 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1488 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1489 MaxStoresPerMemset = 32;
1491 MaxStoresPerMemcpy = 32;
1495 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1496 // The A2 also benefits from (very) aggressive inlining of memcpy and
1497 // friends. The overhead of a the function call, even when warm, can be
1498 // over one hundred cycles.
1499 MaxStoresPerMemset = 128;
1500 MaxStoresPerMemcpy = 128;
1501 MaxStoresPerMemmove = 128;
1502 MaxLoadsPerMemcmp = 128;
1503 } else {
1506 }
1507
1508 IsStrictFPEnabled = true;
1509
1510 // Let the subtarget (CPU) decide if a predictable select is more expensive
1511 // than the corresponding branch. This information is used in CGP to decide
1512 // when to convert selects into branches.
1514
1516}
1517
1518// *********************************** NOTE ************************************
1519// For selecting load and store instructions, the addressing modes are defined
1520// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1521// patterns to match the load the store instructions.
1522//
1523// The TD definitions for the addressing modes correspond to their respective
1524// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1525// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1526// address mode flags of a particular node. Afterwards, the computed address
1527// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1528// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1529// accordingly, based on the preferred addressing mode.
1530//
1531// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1532// MemOpFlags contains all the possible flags that can be used to compute the
1533// optimal addressing mode for load and store instructions.
1534// AddrMode contains all the possible load and store addressing modes available
1535// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1536//
1537// When adding new load and store instructions, it is possible that new address
1538// flags may need to be added into MemOpFlags, and a new addressing mode will
1539// need to be added to AddrMode. An entry of the new addressing mode (consisting
1540// of the minimal and main distinguishing address flags for the new load/store
1541// instructions) will need to be added into initializeAddrModeMap() below.
1542// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1543// need to be updated to account for selecting the optimal addressing mode.
1544// *****************************************************************************
1545/// Initialize the map that relates the different addressing modes of the load
1546/// and store instructions to a set of flags. This ensures the load/store
1547/// instruction is correctly matched during instruction selection.
1548void PPCTargetLowering::initializeAddrModeMap() {
1549 AddrModesMap[PPC::AM_DForm] = {
1550 // LWZ, STW
1555 // LBZ, LHZ, STB, STH
1560 // LHA
1565 // LFS, LFD, STFS, STFD
1570 };
1571 AddrModesMap[PPC::AM_DSForm] = {
1572 // LWA
1576 // LD, STD
1580 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1584 };
1585 AddrModesMap[PPC::AM_DQForm] = {
1586 // LXV, STXV
1590 };
1591 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1593 // TODO: Add mapping for quadword load/store.
1594}
1595
1596/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1597/// the desired ByVal argument alignment.
1598static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1599 if (MaxAlign == MaxMaxAlign)
1600 return;
1601 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1602 if (MaxMaxAlign >= 32 &&
1603 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1604 MaxAlign = Align(32);
1605 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1606 MaxAlign < 16)
1607 MaxAlign = Align(16);
1608 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1609 Align EltAlign;
1610 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1611 if (EltAlign > MaxAlign)
1612 MaxAlign = EltAlign;
1613 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1614 for (auto *EltTy : STy->elements()) {
1615 Align EltAlign;
1616 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1617 if (EltAlign > MaxAlign)
1618 MaxAlign = EltAlign;
1619 if (MaxAlign == MaxMaxAlign)
1620 break;
1621 }
1622 }
1623}
1624
1625/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1626/// function arguments in the caller parameter area.
1628 const DataLayout &DL) const {
1629 // 16byte and wider vectors are passed on 16byte boundary.
1630 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1631 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1632 if (Subtarget.hasAltivec())
1633 getMaxByValAlign(Ty, Alignment, Align(16));
1634 return Alignment.value();
1635}
1636
1638 return Subtarget.useSoftFloat();
1639}
1640
1642 return Subtarget.hasSPE();
1643}
1644
1646 return VT.isScalarInteger();
1647}
1648
1650 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1651 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1652 return false;
1653
1654 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1655 if (VTy->getScalarType()->isIntegerTy()) {
1656 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1657 if (ElemSizeInBits == 32) {
1658 Index = Subtarget.isLittleEndian() ? 2 : 1;
1659 return true;
1660 }
1661 if (ElemSizeInBits == 64) {
1662 Index = Subtarget.isLittleEndian() ? 1 : 0;
1663 return true;
1664 }
1665 }
1666 }
1667 return false;
1668}
1669
1670const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1671 switch ((PPCISD::NodeType)Opcode) {
1672 case PPCISD::FIRST_NUMBER: break;
1673 case PPCISD::FSEL: return "PPCISD::FSEL";
1674 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1675 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1676 case PPCISD::FCFID: return "PPCISD::FCFID";
1677 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1678 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1679 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1680 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1681 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1682 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1683 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1684 case PPCISD::FRE: return "PPCISD::FRE";
1685 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1686 case PPCISD::FTSQRT:
1687 return "PPCISD::FTSQRT";
1688 case PPCISD::FSQRT:
1689 return "PPCISD::FSQRT";
1690 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1691 case PPCISD::VPERM: return "PPCISD::VPERM";
1692 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1694 return "PPCISD::XXSPLTI_SP_TO_DP";
1696 return "PPCISD::XXSPLTI32DX";
1697 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1698 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1699 case PPCISD::XXPERM:
1700 return "PPCISD::XXPERM";
1701 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1702 case PPCISD::CMPB: return "PPCISD::CMPB";
1703 case PPCISD::Hi: return "PPCISD::Hi";
1704 case PPCISD::Lo: return "PPCISD::Lo";
1705 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1706 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1707 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1708 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1709 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1710 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1711 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1712 case PPCISD::SRL: return "PPCISD::SRL";
1713 case PPCISD::SRA: return "PPCISD::SRA";
1714 case PPCISD::SHL: return "PPCISD::SHL";
1715 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1716 case PPCISD::CALL: return "PPCISD::CALL";
1717 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1718 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1719 case PPCISD::CALL_RM:
1720 return "PPCISD::CALL_RM";
1722 return "PPCISD::CALL_NOP_RM";
1724 return "PPCISD::CALL_NOTOC_RM";
1725 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1726 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1727 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1728 case PPCISD::BCTRL_RM:
1729 return "PPCISD::BCTRL_RM";
1731 return "PPCISD::BCTRL_LOAD_TOC_RM";
1732 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1733 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1734 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1735 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1736 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1737 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1738 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1739 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1740 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1741 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1743 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1745 return "PPCISD::ANDI_rec_1_EQ_BIT";
1747 return "PPCISD::ANDI_rec_1_GT_BIT";
1748 case PPCISD::VCMP: return "PPCISD::VCMP";
1749 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1750 case PPCISD::LBRX: return "PPCISD::LBRX";
1751 case PPCISD::STBRX: return "PPCISD::STBRX";
1752 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1753 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1754 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1755 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1756 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1757 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1758 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1759 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1760 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1762 return "PPCISD::ST_VSR_SCAL_INT";
1763 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1764 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1765 case PPCISD::BDZ: return "PPCISD::BDZ";
1766 case PPCISD::MFFS: return "PPCISD::MFFS";
1767 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1768 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1769 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1770 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1771 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1772 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1773 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1774 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1775 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1776 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1777 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1778 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1779 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1780 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1781 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1782 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1783 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1784 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1785 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1786 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1787 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1788 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1789 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1791 return "PPCISD::PADDI_DTPREL";
1792 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1793 case PPCISD::SC: return "PPCISD::SC";
1794 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1795 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1796 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1797 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1798 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1799 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1800 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1801 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1802 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1803 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1804 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1805 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1807 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1809 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1810 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1811 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1812 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1813 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1814 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1815 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1816 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1817 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1819 return "PPCISD::STRICT_FADDRTZ";
1821 return "PPCISD::STRICT_FCTIDZ";
1823 return "PPCISD::STRICT_FCTIWZ";
1825 return "PPCISD::STRICT_FCTIDUZ";
1827 return "PPCISD::STRICT_FCTIWUZ";
1829 return "PPCISD::STRICT_FCFID";
1831 return "PPCISD::STRICT_FCFIDU";
1833 return "PPCISD::STRICT_FCFIDS";
1835 return "PPCISD::STRICT_FCFIDUS";
1836 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1837 case PPCISD::STORE_COND:
1838 return "PPCISD::STORE_COND";
1839 }
1840 return nullptr;
1841}
1842
1844 EVT VT) const {
1845 if (!VT.isVector())
1846 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1847
1849}
1850
1852 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1853 return true;
1854}
1855
1856//===----------------------------------------------------------------------===//
1857// Node matching predicates, for use by the tblgen matching code.
1858//===----------------------------------------------------------------------===//
1859
1860/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1862 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1863 return CFP->getValueAPF().isZero();
1864 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1865 // Maybe this has already been legalized into the constant pool?
1866 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1867 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1868 return CFP->getValueAPF().isZero();
1869 }
1870 return false;
1871}
1872
1873/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1874/// true if Op is undef or if it matches the specified value.
1875static bool isConstantOrUndef(int Op, int Val) {
1876 return Op < 0 || Op == Val;
1877}
1878
1879/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1880/// VPKUHUM instruction.
1881/// The ShuffleKind distinguishes between big-endian operations with
1882/// two different inputs (0), either-endian operations with two identical
1883/// inputs (1), and little-endian operations with two different inputs (2).
1884/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1886 SelectionDAG &DAG) {
1887 bool IsLE = DAG.getDataLayout().isLittleEndian();
1888 if (ShuffleKind == 0) {
1889 if (IsLE)
1890 return false;
1891 for (unsigned i = 0; i != 16; ++i)
1892 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1893 return false;
1894 } else if (ShuffleKind == 2) {
1895 if (!IsLE)
1896 return false;
1897 for (unsigned i = 0; i != 16; ++i)
1898 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1899 return false;
1900 } else if (ShuffleKind == 1) {
1901 unsigned j = IsLE ? 0 : 1;
1902 for (unsigned i = 0; i != 8; ++i)
1903 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1904 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1905 return false;
1906 }
1907 return true;
1908}
1909
1910/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1911/// VPKUWUM instruction.
1912/// The ShuffleKind distinguishes between big-endian operations with
1913/// two different inputs (0), either-endian operations with two identical
1914/// inputs (1), and little-endian operations with two different inputs (2).
1915/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1917 SelectionDAG &DAG) {
1918 bool IsLE = DAG.getDataLayout().isLittleEndian();
1919 if (ShuffleKind == 0) {
1920 if (IsLE)
1921 return false;
1922 for (unsigned i = 0; i != 16; i += 2)
1923 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1924 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1925 return false;
1926 } else if (ShuffleKind == 2) {
1927 if (!IsLE)
1928 return false;
1929 for (unsigned i = 0; i != 16; i += 2)
1930 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1931 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1932 return false;
1933 } else if (ShuffleKind == 1) {
1934 unsigned j = IsLE ? 0 : 2;
1935 for (unsigned i = 0; i != 8; i += 2)
1936 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1937 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1938 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1939 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1940 return false;
1941 }
1942 return true;
1943}
1944
1945/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1946/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1947/// current subtarget.
1948///
1949/// The ShuffleKind distinguishes between big-endian operations with
1950/// two different inputs (0), either-endian operations with two identical
1951/// inputs (1), and little-endian operations with two different inputs (2).
1952/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1954 SelectionDAG &DAG) {
1955 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1956 if (!Subtarget.hasP8Vector())
1957 return false;
1958
1959 bool IsLE = DAG.getDataLayout().isLittleEndian();
1960 if (ShuffleKind == 0) {
1961 if (IsLE)
1962 return false;
1963 for (unsigned i = 0; i != 16; i += 4)
1964 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1965 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1966 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1967 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1968 return false;
1969 } else if (ShuffleKind == 2) {
1970 if (!IsLE)
1971 return false;
1972 for (unsigned i = 0; i != 16; i += 4)
1973 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1974 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1975 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1976 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1977 return false;
1978 } else if (ShuffleKind == 1) {
1979 unsigned j = IsLE ? 0 : 4;
1980 for (unsigned i = 0; i != 8; i += 4)
1981 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1982 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1983 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1984 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1985 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1986 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1987 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1988 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1989 return false;
1990 }
1991 return true;
1992}
1993
1994/// isVMerge - Common function, used to match vmrg* shuffles.
1995///
1996static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1997 unsigned LHSStart, unsigned RHSStart) {
1998 if (N->getValueType(0) != MVT::v16i8)
1999 return false;
2000 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2001 "Unsupported merge size!");
2002
2003 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2004 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2005 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2006 LHSStart+j+i*UnitSize) ||
2007 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2008 RHSStart+j+i*UnitSize))
2009 return false;
2010 }
2011 return true;
2012}
2013
2014/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2015/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2016/// The ShuffleKind distinguishes between big-endian merges with two
2017/// different inputs (0), either-endian merges with two identical inputs (1),
2018/// and little-endian merges with two different inputs (2). For the latter,
2019/// the input operands are swapped (see PPCInstrAltivec.td).
2021 unsigned ShuffleKind, SelectionDAG &DAG) {
2022 if (DAG.getDataLayout().isLittleEndian()) {
2023 if (ShuffleKind == 1) // unary
2024 return isVMerge(N, UnitSize, 0, 0);
2025 else if (ShuffleKind == 2) // swapped
2026 return isVMerge(N, UnitSize, 0, 16);
2027 else
2028 return false;
2029 } else {
2030 if (ShuffleKind == 1) // unary
2031 return isVMerge(N, UnitSize, 8, 8);
2032 else if (ShuffleKind == 0) // normal
2033 return isVMerge(N, UnitSize, 8, 24);
2034 else
2035 return false;
2036 }
2037}
2038
2039/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2040/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2041/// The ShuffleKind distinguishes between big-endian merges with two
2042/// different inputs (0), either-endian merges with two identical inputs (1),
2043/// and little-endian merges with two different inputs (2). For the latter,
2044/// the input operands are swapped (see PPCInstrAltivec.td).
2046 unsigned ShuffleKind, SelectionDAG &DAG) {
2047 if (DAG.getDataLayout().isLittleEndian()) {
2048 if (ShuffleKind == 1) // unary
2049 return isVMerge(N, UnitSize, 8, 8);
2050 else if (ShuffleKind == 2) // swapped
2051 return isVMerge(N, UnitSize, 8, 24);
2052 else
2053 return false;
2054 } else {
2055 if (ShuffleKind == 1) // unary
2056 return isVMerge(N, UnitSize, 0, 0);
2057 else if (ShuffleKind == 0) // normal
2058 return isVMerge(N, UnitSize, 0, 16);
2059 else
2060 return false;
2061 }
2062}
2063
2064/**
2065 * Common function used to match vmrgew and vmrgow shuffles
2066 *
2067 * The indexOffset determines whether to look for even or odd words in
2068 * the shuffle mask. This is based on the of the endianness of the target
2069 * machine.
2070 * - Little Endian:
2071 * - Use offset of 0 to check for odd elements
2072 * - Use offset of 4 to check for even elements
2073 * - Big Endian:
2074 * - Use offset of 0 to check for even elements
2075 * - Use offset of 4 to check for odd elements
2076 * A detailed description of the vector element ordering for little endian and
2077 * big endian can be found at
2078 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2079 * Targeting your applications - what little endian and big endian IBM XL C/C++
2080 * compiler differences mean to you
2081 *
2082 * The mask to the shuffle vector instruction specifies the indices of the
2083 * elements from the two input vectors to place in the result. The elements are
2084 * numbered in array-access order, starting with the first vector. These vectors
2085 * are always of type v16i8, thus each vector will contain 16 elements of size
2086 * 8. More info on the shuffle vector can be found in the
2087 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2088 * Language Reference.
2089 *
2090 * The RHSStartValue indicates whether the same input vectors are used (unary)
2091 * or two different input vectors are used, based on the following:
2092 * - If the instruction uses the same vector for both inputs, the range of the
2093 * indices will be 0 to 15. In this case, the RHSStart value passed should
2094 * be 0.
2095 * - If the instruction has two different vectors then the range of the
2096 * indices will be 0 to 31. In this case, the RHSStart value passed should
2097 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2098 * to 31 specify elements in the second vector).
2099 *
2100 * \param[in] N The shuffle vector SD Node to analyze
2101 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2102 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2103 * vector to the shuffle_vector instruction
2104 * \return true iff this shuffle vector represents an even or odd word merge
2105 */
2106static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2107 unsigned RHSStartValue) {
2108 if (N->getValueType(0) != MVT::v16i8)
2109 return false;
2110
2111 for (unsigned i = 0; i < 2; ++i)
2112 for (unsigned j = 0; j < 4; ++j)
2113 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2114 i*RHSStartValue+j+IndexOffset) ||
2115 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2116 i*RHSStartValue+j+IndexOffset+8))
2117 return false;
2118 return true;
2119}
2120
2121/**
2122 * Determine if the specified shuffle mask is suitable for the vmrgew or
2123 * vmrgow instructions.
2124 *
2125 * \param[in] N The shuffle vector SD Node to analyze
2126 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2127 * \param[in] ShuffleKind Identify the type of merge:
2128 * - 0 = big-endian merge with two different inputs;
2129 * - 1 = either-endian merge with two identical inputs;
2130 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2131 * little-endian merges).
2132 * \param[in] DAG The current SelectionDAG
2133 * \return true iff this shuffle mask
2134 */
2136 unsigned ShuffleKind, SelectionDAG &DAG) {
2137 if (DAG.getDataLayout().isLittleEndian()) {
2138 unsigned indexOffset = CheckEven ? 4 : 0;
2139 if (ShuffleKind == 1) // Unary
2140 return isVMerge(N, indexOffset, 0);
2141 else if (ShuffleKind == 2) // swapped
2142 return isVMerge(N, indexOffset, 16);
2143 else
2144 return false;
2145 }
2146 else {
2147 unsigned indexOffset = CheckEven ? 0 : 4;
2148 if (ShuffleKind == 1) // Unary
2149 return isVMerge(N, indexOffset, 0);
2150 else if (ShuffleKind == 0) // Normal
2151 return isVMerge(N, indexOffset, 16);
2152 else
2153 return false;
2154 }
2155 return false;
2156}
2157
2158/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2159/// amount, otherwise return -1.
2160/// The ShuffleKind distinguishes between big-endian operations with two
2161/// different inputs (0), either-endian operations with two identical inputs
2162/// (1), and little-endian operations with two different inputs (2). For the
2163/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2164int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2165 SelectionDAG &DAG) {
2166 if (N->getValueType(0) != MVT::v16i8)
2167 return -1;
2168
2169 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2170
2171 // Find the first non-undef value in the shuffle mask.
2172 unsigned i;
2173 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2174 /*search*/;
2175
2176 if (i == 16) return -1; // all undef.
2177
2178 // Otherwise, check to see if the rest of the elements are consecutively
2179 // numbered from this value.
2180 unsigned ShiftAmt = SVOp->getMaskElt(i);
2181 if (ShiftAmt < i) return -1;
2182
2183 ShiftAmt -= i;
2184 bool isLE = DAG.getDataLayout().isLittleEndian();
2185
2186 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2187 // Check the rest of the elements to see if they are consecutive.
2188 for (++i; i != 16; ++i)
2189 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2190 return -1;
2191 } else if (ShuffleKind == 1) {
2192 // Check the rest of the elements to see if they are consecutive.
2193 for (++i; i != 16; ++i)
2194 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2195 return -1;
2196 } else
2197 return -1;
2198
2199 if (isLE)
2200 ShiftAmt = 16 - ShiftAmt;
2201
2202 return ShiftAmt;
2203}
2204
2205/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2206/// specifies a splat of a single element that is suitable for input to
2207/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2209 EVT VT = N->getValueType(0);
2210 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2211 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2212
2213 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2214 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2215
2216 // The consecutive indices need to specify an element, not part of two
2217 // different elements. So abandon ship early if this isn't the case.
2218 if (N->getMaskElt(0) % EltSize != 0)
2219 return false;
2220
2221 // This is a splat operation if each element of the permute is the same, and
2222 // if the value doesn't reference the second vector.
2223 unsigned ElementBase = N->getMaskElt(0);
2224
2225 // FIXME: Handle UNDEF elements too!
2226 if (ElementBase >= 16)
2227 return false;
2228
2229 // Check that the indices are consecutive, in the case of a multi-byte element
2230 // splatted with a v16i8 mask.
2231 for (unsigned i = 1; i != EltSize; ++i)
2232 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2233 return false;
2234
2235 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2236 if (N->getMaskElt(i) < 0) continue;
2237 for (unsigned j = 0; j != EltSize; ++j)
2238 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2239 return false;
2240 }
2241 return true;
2242}
2243
2244/// Check that the mask is shuffling N byte elements. Within each N byte
2245/// element of the mask, the indices could be either in increasing or
2246/// decreasing order as long as they are consecutive.
2247/// \param[in] N the shuffle vector SD Node to analyze
2248/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2249/// Word/DoubleWord/QuadWord).
2250/// \param[in] StepLen the delta indices number among the N byte element, if
2251/// the mask is in increasing/decreasing order then it is 1/-1.
2252/// \return true iff the mask is shuffling N byte elements.
2253static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2254 int StepLen) {
2255 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2256 "Unexpected element width.");
2257 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2258
2259 unsigned NumOfElem = 16 / Width;
2260 unsigned MaskVal[16]; // Width is never greater than 16
2261 for (unsigned i = 0; i < NumOfElem; ++i) {
2262 MaskVal[0] = N->getMaskElt(i * Width);
2263 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2264 return false;
2265 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2266 return false;
2267 }
2268
2269 for (unsigned int j = 1; j < Width; ++j) {
2270 MaskVal[j] = N->getMaskElt(i * Width + j);
2271 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2272 return false;
2273 }
2274 }
2275 }
2276
2277 return true;
2278}
2279
2280bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2281 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2282 if (!isNByteElemShuffleMask(N, 4, 1))
2283 return false;
2284
2285 // Now we look at mask elements 0,4,8,12
2286 unsigned M0 = N->getMaskElt(0) / 4;
2287 unsigned M1 = N->getMaskElt(4) / 4;
2288 unsigned M2 = N->getMaskElt(8) / 4;
2289 unsigned M3 = N->getMaskElt(12) / 4;
2290 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2291 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2292
2293 // Below, let H and L be arbitrary elements of the shuffle mask
2294 // where H is in the range [4,7] and L is in the range [0,3].
2295 // H, 1, 2, 3 or L, 5, 6, 7
2296 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2297 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2298 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2299 InsertAtByte = IsLE ? 12 : 0;
2300 Swap = M0 < 4;
2301 return true;
2302 }
2303 // 0, H, 2, 3 or 4, L, 6, 7
2304 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2305 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2306 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2307 InsertAtByte = IsLE ? 8 : 4;
2308 Swap = M1 < 4;
2309 return true;
2310 }
2311 // 0, 1, H, 3 or 4, 5, L, 7
2312 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2313 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2314 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2315 InsertAtByte = IsLE ? 4 : 8;
2316 Swap = M2 < 4;
2317 return true;
2318 }
2319 // 0, 1, 2, H or 4, 5, 6, L
2320 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2321 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2322 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2323 InsertAtByte = IsLE ? 0 : 12;
2324 Swap = M3 < 4;
2325 return true;
2326 }
2327
2328 // If both vector operands for the shuffle are the same vector, the mask will
2329 // contain only elements from the first one and the second one will be undef.
2330 if (N->getOperand(1).isUndef()) {
2331 ShiftElts = 0;
2332 Swap = true;
2333 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2334 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2335 InsertAtByte = IsLE ? 12 : 0;
2336 return true;
2337 }
2338 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2339 InsertAtByte = IsLE ? 8 : 4;
2340 return true;
2341 }
2342 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2343 InsertAtByte = IsLE ? 4 : 8;
2344 return true;
2345 }
2346 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2347 InsertAtByte = IsLE ? 0 : 12;
2348 return true;
2349 }
2350 }
2351
2352 return false;
2353}
2354
2356 bool &Swap, bool IsLE) {
2357 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2358 // Ensure each byte index of the word is consecutive.
2359 if (!isNByteElemShuffleMask(N, 4, 1))
2360 return false;
2361
2362 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2363 unsigned M0 = N->getMaskElt(0) / 4;
2364 unsigned M1 = N->getMaskElt(4) / 4;
2365 unsigned M2 = N->getMaskElt(8) / 4;
2366 unsigned M3 = N->getMaskElt(12) / 4;
2367
2368 // If both vector operands for the shuffle are the same vector, the mask will
2369 // contain only elements from the first one and the second one will be undef.
2370 if (N->getOperand(1).isUndef()) {
2371 assert(M0 < 4 && "Indexing into an undef vector?");
2372 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2373 return false;
2374
2375 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2376 Swap = false;
2377 return true;
2378 }
2379
2380 // Ensure each word index of the ShuffleVector Mask is consecutive.
2381 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2382 return false;
2383
2384 if (IsLE) {
2385 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2386 // Input vectors don't need to be swapped if the leading element
2387 // of the result is one of the 3 left elements of the second vector
2388 // (or if there is no shift to be done at all).
2389 Swap = false;
2390 ShiftElts = (8 - M0) % 8;
2391 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2392 // Input vectors need to be swapped if the leading element
2393 // of the result is one of the 3 left elements of the first vector
2394 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2395 Swap = true;
2396 ShiftElts = (4 - M0) % 4;
2397 }
2398
2399 return true;
2400 } else { // BE
2401 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2402 // Input vectors don't need to be swapped if the leading element
2403 // of the result is one of the 4 elements of the first vector.
2404 Swap = false;
2405 ShiftElts = M0;
2406 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2407 // Input vectors need to be swapped if the leading element
2408 // of the result is one of the 4 elements of the right vector.
2409 Swap = true;
2410 ShiftElts = M0 - 4;
2411 }
2412
2413 return true;
2414 }
2415}
2416
2418 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2419
2420 if (!isNByteElemShuffleMask(N, Width, -1))
2421 return false;
2422
2423 for (int i = 0; i < 16; i += Width)
2424 if (N->getMaskElt(i) != i + Width - 1)
2425 return false;
2426
2427 return true;
2428}
2429
2431 return isXXBRShuffleMaskHelper(N, 2);
2432}
2433
2435 return isXXBRShuffleMaskHelper(N, 4);
2436}
2437
2439 return isXXBRShuffleMaskHelper(N, 8);
2440}
2441
2443 return isXXBRShuffleMaskHelper(N, 16);
2444}
2445
2446/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2447/// if the inputs to the instruction should be swapped and set \p DM to the
2448/// value for the immediate.
2449/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2450/// AND element 0 of the result comes from the first input (LE) or second input
2451/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2452/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2453/// mask.
2455 bool &Swap, bool IsLE) {
2456 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2457
2458 // Ensure each byte index of the double word is consecutive.
2459 if (!isNByteElemShuffleMask(N, 8, 1))
2460 return false;
2461
2462 unsigned M0 = N->getMaskElt(0) / 8;
2463 unsigned M1 = N->getMaskElt(8) / 8;
2464 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2465
2466 // If both vector operands for the shuffle are the same vector, the mask will
2467 // contain only elements from the first one and the second one will be undef.
2468 if (N->getOperand(1).isUndef()) {
2469 if ((M0 | M1) < 2) {
2470 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2471 Swap = false;
2472 return true;
2473 } else
2474 return false;
2475 }
2476
2477 if (IsLE) {
2478 if (M0 > 1 && M1 < 2) {
2479 Swap = false;
2480 } else if (M0 < 2 && M1 > 1) {
2481 M0 = (M0 + 2) % 4;
2482 M1 = (M1 + 2) % 4;
2483 Swap = true;
2484 } else
2485 return false;
2486
2487 // Note: if control flow comes here that means Swap is already set above
2488 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2489 return true;
2490 } else { // BE
2491 if (M0 < 2 && M1 > 1) {
2492 Swap = false;
2493 } else if (M0 > 1 && M1 < 2) {
2494 M0 = (M0 + 2) % 4;
2495 M1 = (M1 + 2) % 4;
2496 Swap = true;
2497 } else
2498 return false;
2499
2500 // Note: if control flow comes here that means Swap is already set above
2501 DM = (M0 << 1) + (M1 & 1);
2502 return true;
2503 }
2504}
2505
2506
2507/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2508/// appropriate for PPC mnemonics (which have a big endian bias - namely
2509/// elements are counted from the left of the vector register).
2510unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2511 SelectionDAG &DAG) {
2512 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2513 assert(isSplatShuffleMask(SVOp, EltSize));
2514 EVT VT = SVOp->getValueType(0);
2515
2516 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2517 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2518 : SVOp->getMaskElt(0);
2519
2520 if (DAG.getDataLayout().isLittleEndian())
2521 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2522 else
2523 return SVOp->getMaskElt(0) / EltSize;
2524}
2525
2526/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2527/// by using a vspltis[bhw] instruction of the specified element size, return
2528/// the constant being splatted. The ByteSize field indicates the number of
2529/// bytes of each element [124] -> [bhw].
2531 SDValue OpVal;
2532
2533 // If ByteSize of the splat is bigger than the element size of the
2534 // build_vector, then we have a case where we are checking for a splat where
2535 // multiple elements of the buildvector are folded together into a single
2536 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2537 unsigned EltSize = 16/N->getNumOperands();
2538 if (EltSize < ByteSize) {
2539 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2540 SDValue UniquedVals[4];
2541 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2542
2543 // See if all of the elements in the buildvector agree across.
2544 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2545 if (N->getOperand(i).isUndef()) continue;
2546 // If the element isn't a constant, bail fully out.
2547 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2548
2549 if (!UniquedVals[i&(Multiple-1)].getNode())
2550 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2551 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2552 return SDValue(); // no match.
2553 }
2554
2555 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2556 // either constant or undef values that are identical for each chunk. See
2557 // if these chunks can form into a larger vspltis*.
2558
2559 // Check to see if all of the leading entries are either 0 or -1. If
2560 // neither, then this won't fit into the immediate field.
2561 bool LeadingZero = true;
2562 bool LeadingOnes = true;
2563 for (unsigned i = 0; i != Multiple-1; ++i) {
2564 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2565
2566 LeadingZero &= isNullConstant(UniquedVals[i]);
2567 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2568 }
2569 // Finally, check the least significant entry.
2570 if (LeadingZero) {
2571 if (!UniquedVals[Multiple-1].getNode())
2572 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2573 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2574 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2575 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2576 }
2577 if (LeadingOnes) {
2578 if (!UniquedVals[Multiple-1].getNode())
2579 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2580 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2581 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2582 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2583 }
2584
2585 return SDValue();
2586 }
2587
2588 // Check to see if this buildvec has a single non-undef value in its elements.
2589 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2590 if (N->getOperand(i).isUndef()) continue;
2591 if (!OpVal.getNode())
2592 OpVal = N->getOperand(i);
2593 else if (OpVal != N->getOperand(i))
2594 return SDValue();
2595 }
2596
2597 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2598
2599 unsigned ValSizeInBytes = EltSize;
2600 uint64_t Value = 0;
2601 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2602 Value = CN->getZExtValue();
2603 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2604 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2605 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2606 }
2607
2608 // If the splat value is larger than the element value, then we can never do
2609 // this splat. The only case that we could fit the replicated bits into our
2610 // immediate field for would be zero, and we prefer to use vxor for it.
2611 if (ValSizeInBytes < ByteSize) return SDValue();
2612
2613 // If the element value is larger than the splat value, check if it consists
2614 // of a repeated bit pattern of size ByteSize.
2615 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2616 return SDValue();
2617
2618 // Properly sign extend the value.
2619 int MaskVal = SignExtend32(Value, ByteSize * 8);
2620
2621 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2622 if (MaskVal == 0) return SDValue();
2623
2624 // Finally, if this value fits in a 5 bit sext field, return it
2625 if (SignExtend32<5>(MaskVal) == MaskVal)
2626 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2627 return SDValue();
2628}
2629
2630//===----------------------------------------------------------------------===//
2631// Addressing Mode Selection
2632//===----------------------------------------------------------------------===//
2633
2634/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2635/// or 64-bit immediate, and if the value can be accurately represented as a
2636/// sign extension from a 16-bit value. If so, this returns true and the
2637/// immediate.
2638bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2639 if (!isa<ConstantSDNode>(N))
2640 return false;
2641
2642 Imm = (int16_t)N->getAsZExtVal();
2643 if (N->getValueType(0) == MVT::i32)
2644 return Imm == (int32_t)N->getAsZExtVal();
2645 else
2646 return Imm == (int64_t)N->getAsZExtVal();
2647}
2649 return isIntS16Immediate(Op.getNode(), Imm);
2650}
2651
2652/// Used when computing address flags for selecting loads and stores.
2653/// If we have an OR, check if the LHS and RHS are provably disjoint.
2654/// An OR of two provably disjoint values is equivalent to an ADD.
2655/// Most PPC load/store instructions compute the effective address as a sum,
2656/// so doing this conversion is useful.
2657static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2658 if (N.getOpcode() != ISD::OR)
2659 return false;
2660 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2661 if (!LHSKnown.Zero.getBoolValue())
2662 return false;
2663 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2664 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2665}
2666
2667/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2668/// be represented as an indexed [r+r] operation.
2670 SDValue &Index,
2671 SelectionDAG &DAG) const {
2672 for (SDNode *U : N->uses()) {
2673 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2674 if (Memop->getMemoryVT() == MVT::f64) {
2675 Base = N.getOperand(0);
2676 Index = N.getOperand(1);
2677 return true;
2678 }
2679 }
2680 }
2681 return false;
2682}
2683
2684/// isIntS34Immediate - This method tests if value of node given can be
2685/// accurately represented as a sign extension from a 34-bit value. If so,
2686/// this returns true and the immediate.
2687bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2688 if (!isa<ConstantSDNode>(N))
2689 return false;
2690
2691 Imm = (int64_t)N->getAsZExtVal();
2692 return isInt<34>(Imm);
2693}
2695 return isIntS34Immediate(Op.getNode(), Imm);
2696}
2697
2698/// SelectAddressRegReg - Given the specified addressed, check to see if it
2699/// can be represented as an indexed [r+r] operation. Returns false if it
2700/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2701/// non-zero and N can be represented by a base register plus a signed 16-bit
2702/// displacement, make a more precise judgement by checking (displacement % \p
2703/// EncodingAlignment).
2706 MaybeAlign EncodingAlignment) const {
2707 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2708 // a [pc+imm].
2710 return false;
2711
2712 int16_t Imm = 0;
2713 if (N.getOpcode() == ISD::ADD) {
2714 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2715 // SPE load/store can only handle 8-bit offsets.
2716 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2717 return true;
2718 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2719 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2720 return false; // r+i
2721 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2722 return false; // r+i
2723
2724 Base = N.getOperand(0);
2725 Index = N.getOperand(1);
2726 return true;
2727 } else if (N.getOpcode() == ISD::OR) {
2728 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2729 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2730 return false; // r+i can fold it if we can.
2731
2732 // If this is an or of disjoint bitfields, we can codegen this as an add
2733 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2734 // disjoint.
2735 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2736
2737 if (LHSKnown.Zero.getBoolValue()) {
2738 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2739 // If all of the bits are known zero on the LHS or RHS, the add won't
2740 // carry.
2741 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2742 Base = N.getOperand(0);
2743 Index = N.getOperand(1);
2744 return true;
2745 }
2746 }
2747 }
2748
2749 return false;
2750}
2751
2752// If we happen to be doing an i64 load or store into a stack slot that has
2753// less than a 4-byte alignment, then the frame-index elimination may need to
2754// use an indexed load or store instruction (because the offset may not be a
2755// multiple of 4). The extra register needed to hold the offset comes from the
2756// register scavenger, and it is possible that the scavenger will need to use
2757// an emergency spill slot. As a result, we need to make sure that a spill slot
2758// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2759// stack slot.
2760static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2761 // FIXME: This does not handle the LWA case.
2762 if (VT != MVT::i64)
2763 return;
2764
2765 // NOTE: We'll exclude negative FIs here, which come from argument
2766 // lowering, because there are no known test cases triggering this problem
2767 // using packed structures (or similar). We can remove this exclusion if
2768 // we find such a test case. The reason why this is so test-case driven is
2769 // because this entire 'fixup' is only to prevent crashes (from the
2770 // register scavenger) on not-really-valid inputs. For example, if we have:
2771 // %a = alloca i1
2772 // %b = bitcast i1* %a to i64*
2773 // store i64* a, i64 b
2774 // then the store should really be marked as 'align 1', but is not. If it
2775 // were marked as 'align 1' then the indexed form would have been
2776 // instruction-selected initially, and the problem this 'fixup' is preventing
2777 // won't happen regardless.
2778 if (FrameIdx < 0)
2779 return;
2780
2782 MachineFrameInfo &MFI = MF.getFrameInfo();
2783
2784 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2785 return;
2786
2787 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2788 FuncInfo->setHasNonRISpills();
2789}
2790
2791/// Returns true if the address N can be represented by a base register plus
2792/// a signed 16-bit displacement [r+imm], and if it is not better
2793/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2794/// displacements that are multiples of that value.
2796 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2797 MaybeAlign EncodingAlignment) const {
2798 // FIXME dl should come from parent load or store, not from address
2799 SDLoc dl(N);
2800
2801 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2802 // a [pc+imm].
2804 return false;
2805
2806 // If this can be more profitably realized as r+r, fail.
2807 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2808 return false;
2809
2810 if (N.getOpcode() == ISD::ADD) {
2811 int16_t imm = 0;
2812 if (isIntS16Immediate(N.getOperand(1), imm) &&
2813 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2814 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2815 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2816 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2817 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2818 } else {
2819 Base = N.getOperand(0);
2820 }
2821 return true; // [r+i]
2822 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2823 // Match LOAD (ADD (X, Lo(G))).
2824 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2825 "Cannot handle constant offsets yet!");
2826 Disp = N.getOperand(1).getOperand(0); // The global address.
2831 Base = N.getOperand(0);
2832 return true; // [&g+r]
2833 }
2834 } else if (N.getOpcode() == ISD::OR) {
2835 int16_t imm = 0;
2836 if (isIntS16Immediate(N.getOperand(1), imm) &&
2837 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2838 // If this is an or of disjoint bitfields, we can codegen this as an add
2839 // (for better address arithmetic) if the LHS and RHS of the OR are
2840 // provably disjoint.
2841 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2842
2843 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2844 // If all of the bits are known zero on the LHS or RHS, the add won't
2845 // carry.
2846 if (FrameIndexSDNode *FI =
2847 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2848 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2849 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2850 } else {
2851 Base = N.getOperand(0);
2852 }
2853 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2854 return true;
2855 }
2856 }
2857 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2858 // Loading from a constant address.
2859
2860 // If this address fits entirely in a 16-bit sext immediate field, codegen
2861 // this as "d, 0"
2862 int16_t Imm;
2863 if (isIntS16Immediate(CN, Imm) &&
2864 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2865 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2866 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2867 CN->getValueType(0));
2868 return true;
2869 }
2870
2871 // Handle 32-bit sext immediates with LIS + addr mode.
2872 if ((CN->getValueType(0) == MVT::i32 ||
2873 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2874 (!EncodingAlignment ||
2875 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2876 int Addr = (int)CN->getZExtValue();
2877
2878 // Otherwise, break this down into an LIS + disp.
2879 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2880
2881 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2882 MVT::i32);
2883 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2884 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2885 return true;
2886 }
2887 }
2888
2889 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2890 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2891 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2892 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2893 } else
2894 Base = N;
2895 return true; // [r+0]
2896}
2897
2898/// Similar to the 16-bit case but for instructions that take a 34-bit
2899/// displacement field (prefixed loads/stores).
2901 SDValue &Base,
2902 SelectionDAG &DAG) const {
2903 // Only on 64-bit targets.
2904 if (N.getValueType() != MVT::i64)
2905 return false;
2906
2907 SDLoc dl(N);
2908 int64_t Imm = 0;
2909
2910 if (N.getOpcode() == ISD::ADD) {
2911 if (!isIntS34Immediate(N.getOperand(1), Imm))
2912 return false;
2913 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2914 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2915 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2916 else
2917 Base = N.getOperand(0);
2918 return true;
2919 }
2920
2921 if (N.getOpcode() == ISD::OR) {
2922 if (!isIntS34Immediate(N.getOperand(1), Imm))
2923 return false;
2924 // If this is an or of disjoint bitfields, we can codegen this as an add
2925 // (for better address arithmetic) if the LHS and RHS of the OR are
2926 // provably disjoint.
2927 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2928 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2929 return false;
2930 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2931 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2932 else
2933 Base = N.getOperand(0);
2934 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2935 return true;
2936 }
2937
2938 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2939 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2940 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2941 return true;
2942 }
2943
2944 return false;
2945}
2946
2947/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2948/// represented as an indexed [r+r] operation.
2950 SDValue &Index,
2951 SelectionDAG &DAG) const {
2952 // Check to see if we can easily represent this as an [r+r] address. This
2953 // will fail if it thinks that the address is more profitably represented as
2954 // reg+imm, e.g. where imm = 0.
2955 if (SelectAddressRegReg(N, Base, Index, DAG))
2956 return true;
2957
2958 // If the address is the result of an add, we will utilize the fact that the
2959 // address calculation includes an implicit add. However, we can reduce
2960 // register pressure if we do not materialize a constant just for use as the
2961 // index register. We only get rid of the add if it is not an add of a
2962 // value and a 16-bit signed constant and both have a single use.
2963 int16_t imm = 0;
2964 if (N.getOpcode() == ISD::ADD &&
2965 (!isIntS16Immediate(N.getOperand(1), imm) ||
2966 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2967 Base = N.getOperand(0);
2968 Index = N.getOperand(1);
2969 return true;
2970 }
2971
2972 // Otherwise, do it the hard way, using R0 as the base register.
2973 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2974 N.getValueType());
2975 Index = N;
2976 return true;
2977}
2978
2979template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2980 Ty *PCRelCand = dyn_cast<Ty>(N);
2981 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2982}
2983
2984/// Returns true if this address is a PC Relative address.
2985/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2986/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2988 // This is a materialize PC Relative node. Always select this as PC Relative.
2989 Base = N;
2990 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2991 return true;
2992 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2993 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2994 isValidPCRelNode<JumpTableSDNode>(N) ||
2995 isValidPCRelNode<BlockAddressSDNode>(N))
2996 return true;
2997 return false;
2998}
2999
3000/// Returns true if we should use a direct load into vector instruction
3001/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3002static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3003
3004 // If there are any other uses other than scalar to vector, then we should
3005 // keep it as a scalar load -> direct move pattern to prevent multiple
3006 // loads.
3007 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3008 if (!LD)
3009 return false;
3010
3011 EVT MemVT = LD->getMemoryVT();
3012 if (!MemVT.isSimple())
3013 return false;
3014 switch(MemVT.getSimpleVT().SimpleTy) {
3015 case MVT::i64:
3016 break;
3017 case MVT::i32:
3018 if (!ST.hasP8Vector())
3019 return false;
3020 break;
3021 case MVT::i16:
3022 case MVT::i8:
3023 if (!ST.hasP9Vector())
3024 return false;
3025 break;
3026 default:
3027 return false;
3028 }
3029
3030 SDValue LoadedVal(N, 0);
3031 if (!LoadedVal.hasOneUse())
3032 return false;
3033
3034 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3035 UI != UE; ++UI)
3036 if (UI.getUse().get().getResNo() == 0 &&
3037 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3038 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3039 return false;
3040
3041 return true;
3042}
3043
3044/// getPreIndexedAddressParts - returns true by value, base pointer and
3045/// offset pointer and addressing mode by reference if the node's address
3046/// can be legally represented as pre-indexed load / store address.
3048 SDValue &Offset,
3050 SelectionDAG &DAG) const {
3051 if (DisablePPCPreinc) return false;
3052
3053 bool isLoad = true;
3054 SDValue Ptr;
3055 EVT VT;
3056 Align Alignment;
3057 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3058 Ptr = LD->getBasePtr();
3059 VT = LD->getMemoryVT();
3060 Alignment = LD->getAlign();
3061 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3062 Ptr = ST->getBasePtr();
3063 VT = ST->getMemoryVT();
3064 Alignment = ST->getAlign();
3065 isLoad = false;
3066 } else
3067 return false;
3068
3069 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3070 // instructions because we can fold these into a more efficient instruction
3071 // instead, (such as LXSD).
3072 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3073 return false;
3074 }
3075
3076 // PowerPC doesn't have preinc load/store instructions for vectors
3077 if (VT.isVector())
3078 return false;
3079
3080 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3081 // Common code will reject creating a pre-inc form if the base pointer
3082 // is a frame index, or if N is a store and the base pointer is either
3083 // the same as or a predecessor of the value being stored. Check for
3084 // those situations here, and try with swapped Base/Offset instead.
3085 bool Swap = false;
3086
3087 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3088 Swap = true;
3089 else if (!isLoad) {
3090 SDValue Val = cast<StoreSDNode>(N)->getValue();
3091 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3092 Swap = true;
3093 }
3094
3095 if (Swap)
3097
3098 AM = ISD::PRE_INC;
3099 return true;
3100 }
3101
3102 // LDU/STU can only handle immediates that are a multiple of 4.
3103 if (VT != MVT::i64) {
3104 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3105 return false;
3106 } else {
3107 // LDU/STU need an address with at least 4-byte alignment.
3108 if (Alignment < Align(4))
3109 return false;
3110
3111 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3112 return false;
3113 }
3114
3115 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3116 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3117 // sext i32 to i64 when addr mode is r+i.
3118 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3119 LD->getExtensionType() == ISD::SEXTLOAD &&
3120 isa<ConstantSDNode>(Offset))
3121 return false;
3122 }
3123
3124 AM = ISD::PRE_INC;
3125 return true;
3126}
3127
3128//===----------------------------------------------------------------------===//
3129// LowerOperation implementation
3130//===----------------------------------------------------------------------===//
3131
3132/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3133/// and LoOpFlags to the target MO flags.
3134static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3135 unsigned &HiOpFlags, unsigned &LoOpFlags,
3136 const GlobalValue *GV = nullptr) {
3137 HiOpFlags = PPCII::MO_HA;
3138 LoOpFlags = PPCII::MO_LO;
3139
3140 // Don't use the pic base if not in PIC relocation model.
3141 if (IsPIC) {
3142 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3143 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3144 }
3145}
3146
3147static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3148 SelectionDAG &DAG) {
3149 SDLoc DL(HiPart);
3150 EVT PtrVT = HiPart.getValueType();
3151 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3152
3153 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3154 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3155
3156 // With PIC, the first instruction is actually "GR+hi(&G)".
3157 if (isPIC)
3158 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3159 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3160
3161 // Generate non-pic code that has direct accesses to the constant pool.
3162 // The address of the global is just (hi(&g)+lo(&g)).
3163 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3164}
3165
3167 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3168 FuncInfo->setUsesTOCBasePtr();
3169}
3170
3173}
3174
3175SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3176 SDValue GA) const {
3177 const bool Is64Bit = Subtarget.isPPC64();
3178 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3179 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3180 : Subtarget.isAIXABI()
3181 ? DAG.getRegister(PPC::R2, VT)
3182 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3183 SDValue Ops[] = { GA, Reg };
3184 return DAG.getMemIntrinsicNode(
3185 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3188}
3189
3190SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3191 SelectionDAG &DAG) const {
3192 EVT PtrVT = Op.getValueType();
3193 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3194 const Constant *C = CP->getConstVal();
3195
3196 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3197 // The actual address of the GlobalValue is stored in the TOC.
3198 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3199 if (Subtarget.isUsingPCRelativeCalls()) {
3200 SDLoc DL(CP);
3201 EVT Ty = getPointerTy(DAG.getDataLayout());
3202 SDValue ConstPool = DAG.getTargetConstantPool(
3203 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3204 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3205 }
3206 setUsesTOCBasePtr(DAG);
3207 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3208 return getTOCEntry(DAG, SDLoc(CP), GA);
3209 }
3210
3211 unsigned MOHiFlag, MOLoFlag;
3212 bool IsPIC = isPositionIndependent();
3213 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3214
3215 if (IsPIC && Subtarget.isSVR4ABI()) {
3216 SDValue GA =
3217 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3218 return getTOCEntry(DAG, SDLoc(CP), GA);
3219 }
3220
3221 SDValue CPIHi =
3222 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3223 SDValue CPILo =
3224 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3225 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3226}
3227
3228// For 64-bit PowerPC, prefer the more compact relative encodings.
3229// This trades 32 bits per jump table entry for one or two instructions
3230// on the jump site.
3232 if (isJumpTableRelative())
3234
3236}
3237
3240 return false;
3241 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3242 return true;
3244}
3245
3247 SelectionDAG &DAG) const {
3248 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3250
3251 switch (getTargetMachine().getCodeModel()) {
3252 case CodeModel::Small:
3253 case CodeModel::Medium:
3255 default:
3256 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3258 }
3259}
3260
3261const MCExpr *
3263 unsigned JTI,
3264 MCContext &Ctx) const {
3265 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3267
3268 switch (getTargetMachine().getCodeModel()) {
3269 case CodeModel::Small:
3270 case CodeModel::Medium:
3272 default:
3273 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3274 }
3275}
3276
3277SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3278 EVT PtrVT = Op.getValueType();
3279 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3280
3281 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3282 if (Subtarget.isUsingPCRelativeCalls()) {
3283 SDLoc DL(JT);
3284 EVT Ty = getPointerTy(DAG.getDataLayout());
3285 SDValue GA =
3286 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3287 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3288 return MatAddr;
3289 }
3290
3291 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3292 // The actual address of the GlobalValue is stored in the TOC.
3293 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3294 setUsesTOCBasePtr(DAG);
3295 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3296 return getTOCEntry(DAG, SDLoc(JT), GA);
3297 }
3298
3299 unsigned MOHiFlag, MOLoFlag;
3300 bool IsPIC = isPositionIndependent();
3301 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3302
3303 if (IsPIC && Subtarget.isSVR4ABI()) {
3304 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3306 return getTOCEntry(DAG, SDLoc(GA), GA);
3307 }
3308
3309 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3310 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3311 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3312}
3313
3314SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3315 SelectionDAG &DAG) const {
3316 EVT PtrVT = Op.getValueType();
3317 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3318 const BlockAddress *BA = BASDN->getBlockAddress();
3319
3320 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3321 if (Subtarget.isUsingPCRelativeCalls()) {
3322 SDLoc DL(BASDN);
3323 EVT Ty = getPointerTy(DAG.getDataLayout());
3324 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3326 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3327 return MatAddr;
3328 }
3329
3330 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3331 // The actual BlockAddress is stored in the TOC.
3332 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3333 setUsesTOCBasePtr(DAG);
3334 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3335 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3336 }
3337
3338 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3339 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3340 return getTOCEntry(
3341 DAG, SDLoc(BASDN),
3342 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3343
3344 unsigned MOHiFlag, MOLoFlag;
3345 bool IsPIC = isPositionIndependent();
3346 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3347 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3348 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3349 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3350}
3351
3352SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3353 SelectionDAG &DAG) const {
3354 if (Subtarget.isAIXABI())
3355 return LowerGlobalTLSAddressAIX(Op, DAG);
3356
3357 return LowerGlobalTLSAddressLinux(Op, DAG);
3358}
3359
3360/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3361/// and then apply the update.
3363 SelectionDAG &DAG,
3364 const TargetMachine &TM) {
3365 // Initialize TLS model opt setting lazily:
3366 // (1) Use initial-exec for single TLS var references within current function.
3367 // (2) Use local-dynamic for multiple TLS var references within current
3368 // function.
3369 PPCFunctionInfo *FuncInfo =
3371 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3373 // Iterate over all instructions within current function, collect all TLS
3374 // global variables (global variables taken as the first parameter to
3375 // Intrinsic::threadlocal_address).
3376 const Function &Func = DAG.getMachineFunction().getFunction();
3377 for (Function::const_iterator BI = Func.begin(), BE = Func.end(); BI != BE;
3378 ++BI)
3379 for (BasicBlock::const_iterator II = BI->begin(), IE = BI->end();
3380 II != IE; ++II)
3381 if (II->getOpcode() == Instruction::Call)
3382 if (const CallInst *CI = dyn_cast<const CallInst>(&*II))
3383 if (Function *CF = CI->getCalledFunction())
3384 if (CF->isDeclaration() &&
3385 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3386 if (const GlobalValue *GV =
3387 dyn_cast<GlobalValue>(II->getOperand(0))) {
3388 TLSModel::Model GVModel = TM.getTLSModel(GV);
3389 if (GVModel == TLSModel::LocalDynamic)
3390 TLSGV.insert(GV);
3391 }
3392
3393 unsigned TLSGVCnt = TLSGV.size();
3394 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3395 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3396 FuncInfo->setAIXFuncUseTLSIEForLD();
3398 }
3399
3400 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3401 LLVM_DEBUG(
3402 dbgs() << DAG.getMachineFunction().getName()
3403 << " function is using the TLS-IE model for TLS-LD access.\n");
3404 Model = TLSModel::InitialExec;
3405 }
3406}
3407
3408SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3409 SelectionDAG &DAG) const {
3410 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3411
3412 if (DAG.getTarget().useEmulatedTLS())
3413 report_fatal_error("Emulated TLS is not yet supported on AIX");
3414
3415 SDLoc dl(GA);
3416 const GlobalValue *GV = GA->getGlobal();
3417 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3418 bool Is64Bit = Subtarget.isPPC64();
3420
3421 // Apply update to the TLS model.
3422 if (Subtarget.hasAIXShLibTLSModelOpt())
3424
3425 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3426
3427 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3428 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3429 bool HasAIXSmallTLSGlobalAttr = false;
3430 SDValue VariableOffsetTGA =
3431 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3432 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3433 SDValue TLSReg;
3434
3435 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3436 if (GVar->hasAttribute("aix-small-tls"))
3437 HasAIXSmallTLSGlobalAttr = true;
3438
3439 if (Is64Bit) {
3440 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3441 // involves a load of the variable offset (from the TOC), followed by an
3442 // add of the loaded variable offset to R13 (the thread pointer).
3443 // This code sequence looks like:
3444 // ld reg1,var[TC](2)
3445 // add reg2, reg1, r13 // r13 contains the thread pointer
3446 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3447
3448 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3449 // global variable attribute, produce a faster access sequence for
3450 // local-exec TLS variables where the offset from the TLS base is encoded
3451 // as an immediate operand.
3452 //
3453 // We only utilize the faster local-exec access sequence when the TLS
3454 // variable has a size within the policy limit. We treat types that are
3455 // not sized or are empty as being over the policy size limit.
3456 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3457 IsTLSLocalExecModel) {
3458 Type *GVType = GV->getValueType();
3459 if (GVType->isSized() && !GVType->isEmptyTy() &&
3460 GV->getDataLayout().getTypeAllocSize(GVType) <=
3462 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3463 }
3464 } else {
3465 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3466 // involves loading the variable offset from the TOC, generating a call to
3467 // .__get_tpointer to get the thread pointer (which will be in R3), and
3468 // adding the two together:
3469 // lwz reg1,var[TC](2)
3470 // bla .__get_tpointer
3471 // add reg2, reg1, r3
3472 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3473
3474 // We do not implement the 32-bit version of the faster access sequence
3475 // for local-exec that is controlled by the -maix-small-local-exec-tls
3476 // option, or the "aix-small-tls" global variable attribute.
3477 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3478 report_fatal_error("The small-local-exec TLS access sequence is "
3479 "currently only supported on AIX (64-bit mode).");
3480 }
3481 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3482 }
3483
3484 if (Model == TLSModel::LocalDynamic) {
3485 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3486
3487 // We do not implement the 32-bit version of the faster access sequence
3488 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3489 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3490 report_fatal_error("The small-local-dynamic TLS access sequence is "
3491 "currently only supported on AIX (64-bit mode).");
3492
3493 // For local-dynamic on AIX, we need to generate one TOC entry for each
3494 // variable offset, and a single module-handle TOC entry for the entire
3495 // file.
3496
3497 SDValue VariableOffsetTGA =
3498 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3499 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3500
3502 GlobalVariable *TLSGV =
3503 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3504 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3506 assert(TLSGV && "Not able to create GV for _$TLSML.");
3507 SDValue ModuleHandleTGA =
3508 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3509 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3510 SDValue ModuleHandle =
3511 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3512
3513 // With the -maix-small-local-dynamic-tls option, produce a faster access
3514 // sequence for local-dynamic TLS variables where the offset from the
3515 // module-handle is encoded as an immediate operand.
3516 //
3517 // We only utilize the faster local-dynamic access sequence when the TLS
3518 // variable has a size within the policy limit. We treat types that are
3519 // not sized or are empty as being over the policy size limit.
3520 if (HasAIXSmallLocalDynamicTLS) {
3521 Type *GVType = GV->getValueType();
3522 if (GVType->isSized() && !GVType->isEmptyTy() &&
3523 GV->getDataLayout().getTypeAllocSize(GVType) <=
3525 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3526 ModuleHandle);
3527 }
3528
3529 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3530 }
3531
3532 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3533 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3534 // need to generate two TOC entries, one for the variable offset, one for the
3535 // region handle. The global address for the TOC entry of the region handle is
3536 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3537 // entry of the variable offset is created with MO_TLSGD_FLAG.
3538 SDValue VariableOffsetTGA =
3539 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3540 SDValue RegionHandleTGA =
3541 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3542 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3543 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3544 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3545 RegionHandle);
3546}
3547
3548SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3549 SelectionDAG &DAG) const {
3550 // FIXME: TLS addresses currently use medium model code sequences,
3551 // which is the most useful form. Eventually support for small and
3552 // large models could be added if users need it, at the cost of
3553 // additional complexity.
3554 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3555 if (DAG.getTarget().useEmulatedTLS())
3556 return LowerToTLSEmulatedModel(GA, DAG);
3557
3558 SDLoc dl(GA);
3559 const GlobalValue *GV = GA->getGlobal();
3560 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3561 bool is64bit = Subtarget.isPPC64();
3562 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3563 PICLevel::Level picLevel = M->getPICLevel();
3564
3566 TLSModel::Model Model = TM.getTLSModel(GV);
3567
3568 if (Model == TLSModel::LocalExec) {
3569 if (Subtarget.isUsingPCRelativeCalls()) {
3570 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3571 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3573 SDValue MatAddr =
3574 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3575 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3576 }
3577
3578 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3580 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3582 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3583 : DAG.getRegister(PPC::R2, MVT::i32);
3584
3585 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3586 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3587 }
3588
3589 if (Model == TLSModel::InitialExec) {
3590 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3592 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3593 SDValue TGATLS = DAG.getTargetGlobalAddress(
3594 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3595 SDValue TPOffset;
3596 if (IsPCRel) {
3597 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3598 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3600 } else {
3601 SDValue GOTPtr;
3602 if (is64bit) {
3603 setUsesTOCBasePtr(DAG);
3604 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3605 GOTPtr =
3606 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3607 } else {
3608 if (!TM.isPositionIndependent())
3609 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3610 else if (picLevel == PICLevel::SmallPIC)
3611 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3612 else
3613 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3614 }
3615 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3616 }
3617 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3618 }
3619
3620 if (Model == TLSModel::GeneralDynamic) {
3621 if (Subtarget.isUsingPCRelativeCalls()) {
3622 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3624 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3625 }
3626
3627 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3628 SDValue GOTPtr;
3629 if (is64bit) {
3630 setUsesTOCBasePtr(DAG);
3631 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3632 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3633 GOTReg, TGA);
3634 } else {
3635 if (picLevel == PICLevel::SmallPIC)
3636 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3637 else
3638 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3639 }
3640 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3641 GOTPtr, TGA, TGA);
3642 }
3643
3644 if (Model == TLSModel::LocalDynamic) {
3645 if (Subtarget.isUsingPCRelativeCalls()) {
3646 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3648 SDValue MatPCRel =
3649 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3650 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3651 }
3652
3653 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3654 SDValue GOTPtr;
3655 if (is64bit) {
3656 setUsesTOCBasePtr(DAG);
3657 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3658 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3659 GOTReg, TGA);
3660 } else {
3661 if (picLevel == PICLevel::SmallPIC)
3662 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3663 else
3664 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3665 }
3666 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3667 PtrVT, GOTPtr, TGA, TGA);
3668 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3669 PtrVT, TLSAddr, TGA);
3670 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3671 }
3672
3673 llvm_unreachable("Unknown TLS model!");
3674}
3675
3676SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3677 SelectionDAG &DAG) const {
3678 EVT PtrVT = Op.getValueType();
3679 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3680 SDLoc DL(GSDN);
3681 const GlobalValue *GV = GSDN->getGlobal();
3682
3683 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3684 // The actual address of the GlobalValue is stored in the TOC.
3685 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3686 if (Subtarget.isUsingPCRelativeCalls()) {
3687 EVT Ty = getPointerTy(DAG.getDataLayout());
3689 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3691 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3692 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3694 return Load;
3695 } else {
3696 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3698 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3699 }
3700 }
3701 setUsesTOCBasePtr(DAG);
3702 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3703 return getTOCEntry(DAG, DL, GA);
3704 }
3705
3706 unsigned MOHiFlag, MOLoFlag;
3707 bool IsPIC = isPositionIndependent();
3708 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3709
3710 if (IsPIC && Subtarget.isSVR4ABI()) {
3711 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3712 GSDN->getOffset(),
3714 return getTOCEntry(DAG, DL, GA);
3715 }
3716
3717 SDValue GAHi =
3718 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3719 SDValue GALo =
3720 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3721
3722 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3723}
3724
3725SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3726 bool IsStrict = Op->isStrictFPOpcode();
3728 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3729 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3730 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3731 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3732 EVT LHSVT = LHS.getValueType();
3733 SDLoc dl(Op);
3734
3735 // Soften the setcc with libcall if it is fp128.
3736 if (LHSVT == MVT::f128) {
3737 assert(!Subtarget.hasP9Vector() &&
3738 "SETCC for f128 is already legal under Power9!");
3739 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3740 Op->getOpcode() == ISD::STRICT_FSETCCS);
3741 if (RHS.getNode())
3742 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3743 DAG.getCondCode(CC));
3744 if (IsStrict)
3745 return DAG.getMergeValues({LHS, Chain}, dl);
3746 return LHS;
3747 }
3748
3749 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3750
3751 if (Op.getValueType() == MVT::v2i64) {
3752 // When the operands themselves are v2i64 values, we need to do something
3753 // special because VSX has no underlying comparison operations for these.
3754 if (LHS.getValueType() == MVT::v2i64) {
3755 // Equality can be handled by casting to the legal type for Altivec
3756 // comparisons, everything else needs to be expanded.
3757 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3758 return SDValue();
3759 SDValue SetCC32 = DAG.getSetCC(
3760 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3761 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3762 int ShuffV[] = {1, 0, 3, 2};
3763 SDValue Shuff =
3764 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3765 return DAG.getBitcast(MVT::v2i64,
3767 dl, MVT::v4i32, Shuff, SetCC32));
3768 }
3769
3770 // We handle most of these in the usual way.
3771 return Op;
3772 }
3773
3774 // If we're comparing for equality to zero, expose the fact that this is
3775 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3776 // fold the new nodes.
3777 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3778 return V;
3779
3780 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3781 // Leave comparisons against 0 and -1 alone for now, since they're usually
3782 // optimized. FIXME: revisit this when we can custom lower all setcc
3783 // optimizations.
3784 if (C->isAllOnes() || C->isZero())
3785 return SDValue();
3786 }
3787
3788 // If we have an integer seteq/setne, turn it into a compare against zero
3789 // by xor'ing the rhs with the lhs, which is faster than setting a
3790 // condition register, reading it back out, and masking the correct bit. The
3791 // normal approach here uses sub to do this instead of xor. Using xor exposes
3792 // the result to other bit-twiddling opportunities.
3793 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3794 EVT VT = Op.getValueType();
3795 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3796 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3797 }
3798 return SDValue();
3799}
3800
3801SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3802 SDNode *Node = Op.getNode();
3803 EVT VT = Node->getValueType(0);
3804 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3805 SDValue InChain = Node->getOperand(0);
3806 SDValue VAListPtr = Node->getOperand(1);
3807 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3808 SDLoc dl(Node);
3809
3810 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3811
3812 // gpr_index
3813 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3814 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3815 InChain = GprIndex.getValue(1);
3816
3817 if (VT == MVT::i64) {
3818 // Check if GprIndex is even
3819 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3820 DAG.getConstant(1, dl, MVT::i32));
3821 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3822 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3823 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3824 DAG.getConstant(1, dl, MVT::i32));
3825 // Align GprIndex to be even if it isn't
3826 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3827 GprIndex);
3828 }
3829
3830 // fpr index is 1 byte after gpr
3831 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3832 DAG.getConstant(1, dl, MVT::i32));
3833
3834 // fpr
3835 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3836 FprPtr, MachinePointerInfo(SV), MVT::i8);
3837 InChain = FprIndex.getValue(1);
3838
3839 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3840 DAG.getConstant(8, dl, MVT::i32));
3841
3842 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3843 DAG.getConstant(4, dl, MVT::i32));
3844
3845 // areas
3846 SDValue OverflowArea =
3847 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3848 InChain = OverflowArea.getValue(1);
3849
3850 SDValue RegSaveArea =
3851 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3852 InChain = RegSaveArea.getValue(1);
3853
3854 // select overflow_area if index > 8
3855 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3856 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3857
3858 // adjustment constant gpr_index * 4/8
3859 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3860 VT.isInteger() ? GprIndex : FprIndex,
3861 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3862 MVT::i32));
3863
3864 // OurReg = RegSaveArea + RegConstant
3865 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3866 RegConstant);
3867
3868 // Floating types are 32 bytes into RegSaveArea
3869 if (VT.isFloatingPoint())
3870 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3871 DAG.getConstant(32, dl, MVT::i32));
3872
3873 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3874 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3875 VT.isInteger() ? GprIndex : FprIndex,
3876 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3877 MVT::i32));
3878
3879 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3880 VT.isInteger() ? VAListPtr : FprPtr,
3881 MachinePointerInfo(SV), MVT::i8);
3882
3883 // determine if we should load from reg_save_area or overflow_area
3884 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3885
3886 // increase overflow_area by 4/8 if gpr/fpr > 8
3887 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3888 DAG.getConstant(VT.isInteger() ? 4 : 8,
3889 dl, MVT::i32));
3890
3891 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3892 OverflowAreaPlusN);
3893
3894 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3895 MachinePointerInfo(), MVT::i32);
3896
3897 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3898}
3899
3900SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3901 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3902
3903 // We have to copy the entire va_list struct:
3904 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3905 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3906 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3907 false, true, /*CI=*/nullptr, std::nullopt,
3909}
3910
3911SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3912 SelectionDAG &DAG) const {
3913 if (Subtarget.isAIXABI())
3914 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3915
3916 return Op.getOperand(0);
3917}
3918
3919SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3922
3923 assert((Op.getOpcode() == ISD::INLINEASM ||
3924 Op.getOpcode() == ISD::INLINEASM_BR) &&
3925 "Expecting Inline ASM node.");
3926
3927 // If an LR store is already known to be required then there is not point in
3928 // checking this ASM as well.
3929 if (MFI.isLRStoreRequired())
3930 return Op;
3931
3932 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3933 // type MVT::Glue. We want to ignore this last operand if that is the case.
3934 unsigned NumOps = Op.getNumOperands();
3935 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3936 --NumOps;
3937
3938 // Check all operands that may contain the LR.
3939 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3940 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3941 unsigned NumVals = Flags.getNumOperandRegisters();
3942 ++i; // Skip the ID value.
3943
3944 switch (Flags.getKind()) {
3945 default:
3946 llvm_unreachable("Bad flags!");
3950 i += NumVals;
3951 break;
3955 for (; NumVals; --NumVals, ++i) {
3956 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3957 if (Reg != PPC::LR && Reg != PPC::LR8)
3958 continue;
3959 MFI.setLRStoreRequired();
3960 return Op;
3961 }
3962 break;
3963 }
3964 }
3965 }
3966
3967 return Op;
3968}
3969
3970SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3971 SelectionDAG &DAG) const {
3972 if (Subtarget.isAIXABI())
3973 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3974
3975 SDValue Chain = Op.getOperand(0);
3976 SDValue Trmp = Op.getOperand(1); // trampoline
3977 SDValue FPtr = Op.getOperand(2); // nested function
3978 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3979 SDLoc dl(Op);
3980
3981 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3982 bool isPPC64 = (PtrVT == MVT::i64);
3983 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3984
3987
3988 Entry.Ty = IntPtrTy;
3989 Entry.Node = Trmp; Args.push_back(Entry);
3990
3991 // TrampSize == (isPPC64 ? 48 : 40);
3992 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3993 isPPC64 ? MVT::i64 : MVT::i32);
3994 Args.push_back(Entry);
3995
3996 Entry.Node = FPtr; Args.push_back(Entry);
3997 Entry.Node = Nest; Args.push_back(Entry);
3998
3999 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4001 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4003 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4004
4005 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4006 return CallResult.second;
4007}
4008
4009SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4011 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4012 EVT PtrVT = getPointerTy(MF.getDataLayout());
4013
4014 SDLoc dl(Op);
4015
4016 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4017 // vastart just stores the address of the VarArgsFrameIndex slot into the
4018 // memory location argument.
4019 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4020 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4021 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4022 MachinePointerInfo(SV));
4023 }
4024
4025 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4026 // We suppose the given va_list is already allocated.
4027 //
4028 // typedef struct {
4029 // char gpr; /* index into the array of 8 GPRs
4030 // * stored in the register save area
4031 // * gpr=0 corresponds to r3,
4032 // * gpr=1 to r4, etc.
4033 // */
4034 // char fpr; /* index into the array of 8 FPRs
4035 // * stored in the register save area
4036 // * fpr=0 corresponds to f1,
4037 // * fpr=1 to f2, etc.
4038 // */
4039 // char *overflow_arg_area;
4040 // /* location on stack that holds
4041 // * the next overflow argument
4042 // */
4043 // char *reg_save_area;
4044 // /* where r3:r10 and f1:f8 (if saved)
4045 // * are stored
4046 // */
4047 // } va_list[1];
4048
4049 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4050 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4051 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4052 PtrVT);
4053 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4054 PtrVT);
4055
4056 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4057 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4058
4059 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4060 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4061
4062 uint64_t FPROffset = 1;
4063 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4064
4065 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4066
4067 // Store first byte : number of int regs
4068 SDValue firstStore =
4069 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4070 MachinePointerInfo(SV), MVT::i8);
4071 uint64_t nextOffset = FPROffset;
4072 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4073 ConstFPROffset);
4074
4075 // Store second byte : number of float regs
4076 SDValue secondStore =
4077 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4078 MachinePointerInfo(SV, nextOffset), MVT::i8);
4079 nextOffset += StackOffset;
4080 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4081
4082 // Store second word : arguments given on stack
4083 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4084 MachinePointerInfo(SV, nextOffset));
4085 nextOffset += FrameOffset;
4086 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4087
4088 // Store third word : arguments given in registers
4089 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4090 MachinePointerInfo(SV, nextOffset));
4091}
4092
4093/// FPR - The set of FP registers that should be allocated for arguments
4094/// on Darwin and AIX.
4095static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4096 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4097 PPC::F11, PPC::F12, PPC::F13};
4098
4099/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4100/// the stack.
4101static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4102 unsigned PtrByteSize) {
4103 unsigned ArgSize = ArgVT.getStoreSize();
4104 if (Flags.isByVal())
4105 ArgSize = Flags.getByValSize();
4106
4107 // Round up to multiples of the pointer size, except for array members,
4108 // which are always packed.
4109 if (!Flags.isInConsecutiveRegs())
4110 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4111
4112 return ArgSize;
4113}
4114
4115/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4116/// on the stack.
4118 ISD::ArgFlagsTy Flags,
4119 unsigned PtrByteSize) {
4120 Align Alignment(PtrByteSize);
4121
4122 // Altivec parameters are padded to a 16 byte boundary.
4123 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4124 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4125 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4126 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4127 Alignment = Align(16);
4128
4129 // ByVal parameters are aligned as requested.
4130 if (Flags.isByVal()) {
4131 auto BVAlign = Flags.getNonZeroByValAlign();
4132 if (BVAlign > PtrByteSize) {
4133 if (BVAlign.value() % PtrByteSize != 0)
4135 "ByVal alignment is not a multiple of the pointer size");
4136
4137 Alignment = BVAlign;
4138 }
4139 }
4140
4141 // Array members are always packed to their original alignment.
4142 if (Flags.isInConsecutiveRegs()) {
4143 // If the array member was split into multiple registers, the first
4144 // needs to be aligned to the size of the full type. (Except for
4145 // ppcf128, which is only aligned as its f64 components.)
4146 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4147 Alignment = Align(OrigVT.getStoreSize());
4148 else
4149 Alignment = Align(ArgVT.getStoreSize());
4150 }
4151
4152 return Alignment;
4153}
4154
4155/// CalculateStackSlotUsed - Return whether this argument will use its
4156/// stack slot (instead of being passed in registers). ArgOffset,
4157/// AvailableFPRs, and AvailableVRs must hold the current argument
4158/// position, and will be updated to account for this argument.
4159static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4160 unsigned PtrByteSize, unsigned LinkageSize,
4161 unsigned ParamAreaSize, unsigned &ArgOffset,
4162 unsigned &AvailableFPRs,
4163 unsigned &AvailableVRs) {
4164 bool UseMemory = false;
4165
4166 // Respect alignment of argument on the stack.
4167 Align Alignment =
4168 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4169 ArgOffset = alignTo(ArgOffset, Alignment);
4170 // If there's no space left in the argument save area, we must
4171 // use memory (this check also catches zero-sized arguments).
4172 if (ArgOffset >= LinkageSize + ParamAreaSize)
4173 UseMemory = true;
4174
4175 // Allocate argument on the stack.
4176 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4177 if (Flags.isInConsecutiveRegsLast())
4178 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4179 // If we overran the argument save area, we must use memory
4180 // (this check catches arguments passed partially in memory)
4181 if (ArgOffset > LinkageSize + ParamAreaSize)
4182 UseMemory = true;
4183
4184 // However, if the argument is actually passed in an FPR or a VR,
4185 // we don't use memory after all.
4186 if (!Flags.isByVal()) {
4187 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4188 if (AvailableFPRs > 0) {
4189 --AvailableFPRs;
4190 return false;
4191 }
4192 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4193 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4194 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4195 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4196 if (AvailableVRs > 0) {
4197 --AvailableVRs;
4198 return false;
4199 }
4200 }
4201
4202 return UseMemory;
4203}
4204
4205/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4206/// ensure minimum alignment required for target.
4208 unsigned NumBytes) {
4209 return alignTo(NumBytes, Lowering->getStackAlign());
4210}
4211
4212SDValue PPCTargetLowering::LowerFormalArguments(
4213 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4214 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4215 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4216 if (Subtarget.isAIXABI())
4217 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4218 InVals);
4219 if (Subtarget.is64BitELFABI())
4220 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4221 InVals);
4222 assert(Subtarget.is32BitELFABI());
4223 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4224 InVals);
4225}
4226
4227SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4228 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4229 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4230 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4231
4232 // 32-bit SVR4 ABI Stack Frame Layout:
4233 // +-----------------------------------+
4234 // +--> | Back chain |
4235 // | +-----------------------------------+
4236 // | | Floating-point register save area |
4237 // | +-----------------------------------+
4238 // | | General register save area |
4239 // | +-----------------------------------+
4240 // | | CR save word |
4241 // | +-----------------------------------+
4242 // | | VRSAVE save word |
4243 // | +-----------------------------------+
4244 // | | Alignment padding |
4245 // | +-----------------------------------+
4246 // | | Vector register save area |
4247 // | +-----------------------------------+
4248 // | | Local variable space |
4249 // | +-----------------------------------+
4250 // | | Parameter list area |
4251 // | +-----------------------------------+
4252 // | | LR save word |
4253 // | +-----------------------------------+
4254 // SP--> +--- | Back chain |
4255 // +-----------------------------------+
4256 //
4257 // Specifications:
4258 // System V Application Binary Interface PowerPC Processor Supplement
4259 // AltiVec Technology Programming Interface Manual
4260
4262 MachineFrameInfo &MFI = MF.getFrameInfo();
4263 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4264
4265 EVT PtrVT = getPointerTy(MF.getDataLayout());
4266 // Potential tail calls could cause overwriting of argument stack slots.
4267 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4268 (CallConv == CallingConv::Fast));
4269 const Align PtrAlign(4);
4270
4271 // Assign locations to all of the incoming arguments.
4273 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4274 *DAG.getContext());
4275
4276 // Reserve space for the linkage area on the stack.
4277 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4278 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4279 if (useSoftFloat())
4280 CCInfo.PreAnalyzeFormalArguments(Ins);
4281
4282 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4283 CCInfo.clearWasPPCF128();
4284
4285 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4286 CCValAssign &VA = ArgLocs[i];
4287
4288 // Arguments stored in registers.
4289 if (VA.isRegLoc()) {
4290 const TargetRegisterClass *RC;
4291 EVT ValVT = VA.getValVT();
4292
4293 switch (ValVT.getSimpleVT().SimpleTy) {
4294 default:
4295 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4296 case MVT::i1:
4297 case MVT::i32:
4298 RC = &PPC::GPRCRegClass;
4299 break;
4300 case MVT::f32:
4301 if (Subtarget.hasP8Vector())
4302 RC = &PPC::VSSRCRegClass;
4303 else if (Subtarget.hasSPE())
4304 RC = &PPC::GPRCRegClass;
4305 else
4306 RC = &PPC::F4RCRegClass;
4307 break;
4308 case MVT::f64:
4309 if (Subtarget.hasVSX())
4310 RC = &PPC::VSFRCRegClass;
4311 else if (Subtarget.hasSPE())
4312 // SPE passes doubles in GPR pairs.
4313 RC = &PPC::GPRCRegClass;
4314 else
4315 RC = &PPC::F8RCRegClass;
4316 break;
4317 case MVT::v16i8:
4318 case MVT::v8i16:
4319 case MVT::v4i32:
4320 RC = &PPC::VRRCRegClass;
4321 break;
4322 case MVT::v4f32:
4323 RC = &PPC::VRRCRegClass;
4324 break;
4325 case MVT::v2f64:
4326 case MVT::v2i64:
4327 RC = &PPC::VRRCRegClass;
4328 break;
4329 }
4330
4331 SDValue ArgValue;
4332 // Transform the arguments stored in physical registers into
4333 // virtual ones.
4334 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4335 assert(i + 1 < e && "No second half of double precision argument");
4336 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4337 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4338 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4339 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4340 if (!Subtarget.isLittleEndian())
4341 std::swap (ArgValueLo, ArgValueHi);
4342 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4343 ArgValueHi);
4344 } else {
4345 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4346 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4347 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4348 if (ValVT == MVT::i1)
4349 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4350 }
4351
4352 InVals.push_back(ArgValue);
4353 } else {
4354 // Argument stored in memory.
4355 assert(VA.isMemLoc());
4356
4357 // Get the extended size of the argument type in stack
4358 unsigned ArgSize = VA.getLocVT().getStoreSize();
4359 // Get the actual size of the argument type
4360 unsigned ObjSize = VA.getValVT().getStoreSize();
4361 unsigned ArgOffset = VA.getLocMemOffset();
4362 // Stack objects in PPC32 are right justified.
4363 ArgOffset += ArgSize - ObjSize;
4364 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4365
4366 // Create load nodes to retrieve arguments from the stack.
4367 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4368 InVals.push_back(
4369 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4370 }
4371 }
4372
4373 // Assign locations to all of the incoming aggregate by value arguments.
4374 // Aggregates passed by value are stored in the local variable space of the
4375 // caller's stack frame, right above the parameter list area.
4376 SmallVector<CCValAssign, 16> ByValArgLocs;
4377 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4378 ByValArgLocs, *DAG.getContext());
4379
4380 // Reserve stack space for the allocations in CCInfo.
4381 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4382
4383 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4384
4385 // Area that is at least reserved in the caller of this function.
4386 unsigned MinReservedArea = CCByValInfo.getStackSize();
4387 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4388
4389 // Set the size that is at least reserved in caller of this function. Tail
4390 // call optimized function's reserved stack space needs to be aligned so that
4391 // taking the difference between two stack areas will result in an aligned
4392 // stack.
4393 MinReservedArea =
4394 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4395 FuncInfo->setMinReservedArea(MinReservedArea);
4396
4398
4399 // If the function takes variable number of arguments, make a frame index for
4400 // the start of the first vararg value... for expansion of llvm.va_start.
4401 if (isVarArg) {
4402 static const MCPhysReg GPArgRegs[] = {
4403 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4404 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4405 };
4406 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4407
4408 static const MCPhysReg FPArgRegs[] = {
4409 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4410 PPC::F8
4411 };
4412 unsigned NumFPArgRegs = std::size(FPArgRegs);
4413
4414 if (useSoftFloat() || hasSPE())
4415 NumFPArgRegs = 0;
4416
4417 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4418 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4419
4420 // Make room for NumGPArgRegs and NumFPArgRegs.
4421 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4422 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4423
4425 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4426
4427 FuncInfo->setVarArgsFrameIndex(
4428 MFI.CreateStackObject(Depth, Align(8), false));
4429 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4430
4431 // The fixed integer arguments of a variadic function are stored to the
4432 // VarArgsFrameIndex on the stack so that they may be loaded by
4433 // dereferencing the result of va_next.
4434 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4435 // Get an existing live-in vreg, or add a new one.
4436 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4437 if (!VReg)
4438 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4439
4440 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4441 SDValue Store =
4442 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4443 MemOps.push_back(Store);
4444 // Increment the address by four for the next argument to store
4445 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4446 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4447 }
4448
4449 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4450 // is set.
4451 // The double arguments are stored to the VarArgsFrameIndex
4452 // on the stack.
4453 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4454 // Get an existing live-in vreg, or add a new one.
4455 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4456 if (!VReg)
4457 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4458
4459 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4460 SDValue Store =
4461 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4462 MemOps.push_back(Store);
4463 // Increment the address by eight for the next argument to store
4464 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4465 PtrVT);
4466 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4467 }
4468 }
4469
4470 if (!MemOps.empty())
4471 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4472
4473 return Chain;
4474}
4475
4476// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4477// value to MVT::i64 and then truncate to the correct register size.
4478SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4479 EVT ObjectVT, SelectionDAG &DAG,
4480 SDValue ArgVal,
4481 const SDLoc &dl) const {
4482 if (Flags.isSExt())
4483 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4484 DAG.getValueType(ObjectVT));
4485 else if (Flags.isZExt())
4486 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4487 DAG.getValueType(ObjectVT));
4488
4489 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4490}
4491
4492SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4493 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4494 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4495 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4496 // TODO: add description of PPC stack frame format, or at least some docs.
4497 //
4498 bool isELFv2ABI = Subtarget.isELFv2ABI();
4499 bool isLittleEndian = Subtarget.isLittleEndian();
4501 MachineFrameInfo &MFI = MF.getFrameInfo();
4502 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4503
4504 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4505 "fastcc not supported on varargs functions");
4506
4507 EVT PtrVT = getPointerTy(MF.getDataLayout());
4508 // Potential tail calls could cause overwriting of argument stack slots.
4509 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4510 (CallConv == CallingConv::Fast));
4511 unsigned PtrByteSize = 8;
4512 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4513
4514 static const MCPhysReg GPR[] = {
4515 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4516 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4517 };
4518 static const MCPhysReg VR[] = {
4519 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4520 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4521 };
4522
4523 const unsigned Num_GPR_Regs = std::size(GPR);
4524 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4525 const unsigned Num_VR_Regs = std::size(VR);
4526
4527 // Do a first pass over the arguments to determine whether the ABI
4528 // guarantees that our caller has allocated the parameter save area
4529 // on its stack frame. In the ELFv1 ABI, this is always the case;
4530 // in the ELFv2 ABI, it is true if this is a vararg function or if
4531 // any parameter is located in a stack slot.
4532
4533 bool HasParameterArea = !isELFv2ABI || isVarArg;
4534 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4535 unsigned NumBytes = LinkageSize;
4536 unsigned AvailableFPRs = Num_FPR_Regs;
4537 unsigned AvailableVRs = Num_VR_Regs;
4538 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4539 if (Ins[i].Flags.isNest())
4540 continue;
4541
4542 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4543 PtrByteSize, LinkageSize, ParamAreaSize,
4544 NumBytes, AvailableFPRs, AvailableVRs))
4545 HasParameterArea = true;
4546 }
4547
4548 // Add DAG nodes to load the arguments or copy them out of registers. On
4549 // entry to a function on PPC, the arguments start after the linkage area,
4550 // although the first ones are often in registers.
4551
4552 unsigned ArgOffset = LinkageSize;
4553 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4556 unsigned CurArgIdx = 0;
4557 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4558 SDValue ArgVal;
4559 bool needsLoad = false;
4560 EVT ObjectVT = Ins[ArgNo].VT;
4561 EVT OrigVT = Ins[ArgNo].ArgVT;
4562 unsigned ObjSize = ObjectVT.getStoreSize();
4563 unsigned ArgSize = ObjSize;
4564 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4565 if (Ins[ArgNo].isOrigArg()) {
4566 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4567 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4568 }
4569 // We re-align the argument offset for each argument, except when using the
4570 // fast calling convention, when we need to make sure we do that only when
4571 // we'll actually use a stack slot.
4572 unsigned CurArgOffset;
4573 Align Alignment;
4574 auto ComputeArgOffset = [&]() {
4575 /* Respect alignment of argument on the stack. */
4576 Alignment =
4577 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4578 ArgOffset = alignTo(ArgOffset, Alignment);
4579 CurArgOffset = ArgOffset;
4580 };
4581
4582 if (CallConv != CallingConv::Fast) {
4583 ComputeArgOffset();
4584
4585 /* Compute GPR index associated with argument offset. */
4586 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4587 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4588 }
4589
4590 // FIXME the codegen can be much improved in some cases.
4591 // We do not have to keep everything in memory.
4592 if (Flags.isByVal()) {
4593 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4594
4595 if (CallConv == CallingConv::Fast)
4596 ComputeArgOffset();
4597
4598 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4599 ObjSize = Flags.getByValSize();
4600 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4601 // Empty aggregate parameters do not take up registers. Examples:
4602 // struct { } a;
4603 // union { } b;
4604 // int c[0];
4605 // etc. However, we have to provide a place-holder in InVals, so
4606 // pretend we have an 8-byte item at the current address for that
4607 // purpose.
4608 if (!ObjSize) {
4609 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4610 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4611 InVals.push_back(FIN);
4612 continue;
4613 }
4614
4615 // Create a stack object covering all stack doublewords occupied
4616 // by the argument. If the argument is (fully or partially) on
4617 // the stack, or if the argument is fully in registers but the
4618 // caller has allocated the parameter save anyway, we can refer
4619 // directly to the caller's stack frame. Otherwise, create a
4620 // local copy in our own frame.
4621 int FI;
4622 if (HasParameterArea ||
4623 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4624 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4625 else
4626 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4627 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4628
4629 // Handle aggregates smaller than 8 bytes.
4630 if (ObjSize < PtrByteSize) {
4631 // The value of the object is its address, which differs from the
4632 // address of the enclosing doubleword on big-endian systems.
4633 SDValue Arg = FIN;
4634 if (!isLittleEndian) {
4635 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4636 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4637 }
4638 InVals.push_back(Arg);
4639
4640 if (GPR_idx != Num_GPR_Regs) {
4641 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4642 FuncInfo->addLiveInAttr(VReg, Flags);
4643 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4644 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4645 SDValue Store =
4646 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4647 MachinePointerInfo(&*FuncArg), ObjType);
4648 MemOps.push_back(Store);
4649 }
4650 // Whether we copied from a register or not, advance the offset
4651 // into the parameter save area by a full doubleword.
4652 ArgOffset += PtrByteSize;
4653 continue;
4654 }
4655
4656 // The value of the object is its address, which is the address of
4657 // its first stack doubleword.
4658 InVals.push_back(FIN);
4659
4660 // Store whatever pieces of the object are in registers to memory.
4661 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4662 if (GPR_idx == Num_GPR_Regs)
4663 break;
4664
4665 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4666 FuncInfo->addLiveInAttr(VReg, Flags);
4667 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4668 SDValue Addr = FIN;
4669 if (j) {
4670 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4671 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4672 }
4673 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4674 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4675 SDValue Store =
4676 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4677 MachinePointerInfo(&*FuncArg, j), ObjType);
4678 MemOps.push_back(Store);
4679 ++GPR_idx;
4680 }
4681 ArgOffset += ArgSize;
4682 continue;
4683 }
4684
4685 switch (ObjectVT.getSimpleVT().SimpleTy) {
4686 default: llvm_unreachable("Unhandled argument type!");
4687 case MVT::i1:
4688 case MVT::i32:
4689 case MVT::i64:
4690 if (Flags.isNest()) {
4691 // The 'nest' parameter, if any, is passed in R11.
4692 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4693 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4694
4695 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4696 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4697
4698 break;
4699 }
4700
4701 // These can be scalar arguments or elements of an integer array type
4702 // passed directly. Clang may use those instead of "byval" aggregate
4703 // types to avoid forcing arguments to memory unnecessarily.
4704 if (GPR_idx != Num_GPR_Regs) {
4705 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4706 FuncInfo->addLiveInAttr(VReg, Flags);
4707 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4708
4709 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4710 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4711 // value to MVT::i64 and then truncate to the correct register size.
4712 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4713 } else {
4714 if (CallConv == CallingConv::Fast)
4715 ComputeArgOffset();
4716
4717 needsLoad = true;
4718 ArgSize = PtrByteSize;
4719 }
4720 if (CallConv != CallingConv::Fast || needsLoad)
4721 ArgOffset += 8;
4722 break;
4723
4724 case MVT::f32:
4725 case MVT::f64:
4726 // These can be scalar arguments or elements of a float array type
4727 // passed directly. The latter are used to implement ELFv2 homogenous
4728 // float aggregates.
4729 if (FPR_idx != Num_FPR_Regs) {
4730 unsigned VReg;
4731
4732 if (ObjectVT == MVT::f32)
4733 VReg = MF.addLiveIn(FPR[FPR_idx],
4734 Subtarget.hasP8Vector()
4735 ? &PPC::VSSRCRegClass
4736 : &PPC::F4RCRegClass);
4737 else
4738 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4739 ? &PPC::VSFRCRegClass
4740 : &PPC::F8RCRegClass);
4741
4742 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4743 ++FPR_idx;
4744 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4745 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4746 // once we support fp <-> gpr moves.
4747
4748 // This can only ever happen in the presence of f32 array types,
4749 // since otherwise we never run out of FPRs before running out
4750 // of GPRs.
4751 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4752 FuncInfo->addLiveInAttr(VReg, Flags);
4753 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4754
4755 if (ObjectVT == MVT::f32) {
4756 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4757 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4758 DAG.getConstant(32, dl, MVT::i32));
4759 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4760 }
4761
4762 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4763 } else {
4764 if (CallConv == CallingConv::Fast)
4765 ComputeArgOffset();
4766
4767 needsLoad = true;
4768 }
4769
4770 // When passing an array of floats, the array occupies consecutive
4771 // space in the argument area; only round up to the next doubleword
4772 // at the end of the array. Otherwise, each float takes 8 bytes.
4773 if (CallConv != CallingConv::Fast || needsLoad) {
4774 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4775 ArgOffset += ArgSize;
4776 if (Flags.isInConsecutiveRegsLast())
4777 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4778 }
4779 break;
4780 case MVT::v4f32:
4781 case MVT::v4i32:
4782 case MVT::v8i16:
4783 case MVT::v16i8:
4784 case MVT::v2f64:
4785 case MVT::v2i64:
4786 case MVT::v1i128:
4787 case MVT::f128:
4788 // These can be scalar arguments or elements of a vector array type
4789 // passed directly. The latter are used to implement ELFv2 homogenous
4790 // vector aggregates.
4791 if (VR_idx != Num_VR_Regs) {
4792 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4793 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4794 ++VR_idx;
4795 } else {
4796 if (CallConv == CallingConv::Fast)
4797 ComputeArgOffset();
4798 needsLoad = true;
4799 }
4800 if (CallConv != CallingConv::Fast || needsLoad)
4801 ArgOffset += 16;
4802 break;
4803 }
4804
4805 // We need to load the argument to a virtual register if we determined
4806 // above that we ran out of physical registers of the appropriate type.
4807 if (needsLoad) {
4808 if (ObjSize < ArgSize && !isLittleEndian)
4809 CurArgOffset += ArgSize - ObjSize;
4810 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4811 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4812 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4813 }
4814
4815 InVals.push_back(ArgVal);
4816 }
4817
4818 // Area that is at least reserved in the caller of this function.
4819 unsigned MinReservedArea;
4820 if (HasParameterArea)
4821 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4822 else
4823 MinReservedArea = LinkageSize;
4824
4825 // Set the size that is at least reserved in caller of this function. Tail
4826 // call optimized functions' reserved stack space needs to be aligned so that
4827 // taking the difference between two stack areas will result in an aligned
4828 // stack.
4829 MinReservedArea =
4830 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4831 FuncInfo->setMinReservedArea(MinReservedArea);
4832
4833 // If the function takes variable number of arguments, make a frame index for
4834 // the start of the first vararg value... for expansion of llvm.va_start.
4835 // On ELFv2ABI spec, it writes:
4836 // C programs that are intended to be *portable* across different compilers
4837 // and architectures must use the header file <stdarg.h> to deal with variable
4838 // argument lists.
4839 if (isVarArg && MFI.hasVAStart()) {
4840 int Depth = ArgOffset;
4841
4842 FuncInfo->setVarArgsFrameIndex(
4843 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4844 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4845
4846 // If this function is vararg, store any remaining integer argument regs
4847 // to their spots on the stack so that they may be loaded by dereferencing
4848 // the result of va_next.
4849 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4850 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4851 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4852 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4853 SDValue Store =
4854 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4855 MemOps.push_back(Store);
4856 // Increment the address by four for the next argument to store
4857 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4858 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4859 }
4860 }
4861
4862 if (!MemOps.empty())
4863 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4864
4865 return Chain;
4866}
4867
4868/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4869/// adjusted to accommodate the arguments for the tailcall.
4870static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4871 unsigned ParamSize) {
4872
4873 if (!isTailCall) return 0;
4874
4876 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4877 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4878 // Remember only if the new adjustment is bigger.
4879 if (SPDiff < FI->getTailCallSPDelta())
4880 FI->setTailCallSPDelta(SPDiff);
4881
4882 return SPDiff;
4883}
4884
4885static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4886
4887static bool callsShareTOCBase(const Function *Caller,
4888 const GlobalValue *CalleeGV,
4889 const TargetMachine &TM) {
4890 // It does not make sense to call callsShareTOCBase() with a caller that
4891 // is PC Relative since PC Relative callers do not have a TOC.
4892#ifndef NDEBUG
4893 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4894 assert(!STICaller->isUsingPCRelativeCalls() &&
4895 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4896#endif
4897
4898 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4899 // don't have enough information to determine if the caller and callee share
4900 // the same TOC base, so we have to pessimistically assume they don't for
4901 // correctness.
4902 if (!CalleeGV)
4903 return false;
4904
4905 // If the callee is preemptable, then the static linker will use a plt-stub
4906 // which saves the toc to the stack, and needs a nop after the call
4907 // instruction to convert to a toc-restore.
4908 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4909 return false;
4910
4911 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4912 // We may need a TOC restore in the situation where the caller requires a
4913 // valid TOC but the callee is PC Relative and does not.
4914 const Function *F = dyn_cast<Function>(CalleeGV);
4915 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4916
4917 // If we have an Alias we can try to get the function from there.
4918 if (Alias) {
4919 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4920 F = dyn_cast<Function>(GlobalObj);
4921 }
4922
4923 // If we still have no valid function pointer we do not have enough
4924 // information to determine if the callee uses PC Relative calls so we must
4925 // assume that it does.
4926 if (!F)
4927 return false;
4928
4929 // If the callee uses PC Relative we cannot guarantee that the callee won't
4930 // clobber the TOC of the caller and so we must assume that the two
4931 // functions do not share a TOC base.
4932 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4933 if (STICallee->isUsingPCRelativeCalls())
4934 return false;
4935
4936 // If the GV is not a strong definition then we need to assume it can be
4937 // replaced by another function at link time. The function that replaces
4938 // it may not share the same TOC as the caller since the callee may be
4939 // replaced by a PC Relative version of the same function.
4940 if (!CalleeGV->isStrongDefinitionForLinker())
4941 return false;
4942
4943 // The medium and large code models are expected to provide a sufficiently
4944 // large TOC to provide all data addressing needs of a module with a
4945 // single TOC.
4946 if (CodeModel::Medium == TM.getCodeModel() ||
4947 CodeModel::Large == TM.getCodeModel())
4948 return true;
4949
4950 // Any explicitly-specified sections and section prefixes must also match.
4951 // Also, if we're using -ffunction-sections, then each function is always in
4952 // a different section (the same is true for COMDAT functions).
4953 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4954 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4955 return false;
4956 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4957 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4958 return false;
4959 }
4960
4961 return true;
4962}
4963
4964static bool
4966 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4967 assert(Subtarget.is64BitELFABI());
4968
4969 const unsigned PtrByteSize = 8;
4970 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4971
4972 static const MCPhysReg GPR[] = {
4973 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4974 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4975 };
4976 static const MCPhysReg VR[] = {
4977 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4978 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4979 };
4980
4981 const unsigned NumGPRs = std::size(GPR);
4982 const unsigned NumFPRs = 13;
4983 const unsigned NumVRs = std::size(VR);
4984 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4985
4986 unsigned NumBytes = LinkageSize;
4987 unsigned AvailableFPRs = NumFPRs;
4988 unsigned AvailableVRs = NumVRs;
4989
4990 for (const ISD::OutputArg& Param : Outs) {
4991 if (Param.Flags.isNest()) continue;
4992
4993 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4994 LinkageSize, ParamAreaSize, NumBytes,
4995 AvailableFPRs, AvailableVRs))
4996 return true;
4997 }
4998 return false;
4999}
5000
5001static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5002 if (CB.arg_size() != CallerFn->arg_size())
5003 return false;
5004
5005 auto CalleeArgIter = CB.arg_begin();
5006 auto CalleeArgEnd = CB.arg_end();
5007 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5008
5009 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5010 const Value* CalleeArg = *CalleeArgIter;
5011 const Value* CallerArg = &(*CallerArgIter);
5012 if (CalleeArg == CallerArg)
5013 continue;
5014
5015 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5016 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5017 // }
5018 // 1st argument of callee is undef and has the same type as caller.
5019 if (CalleeArg->getType() == CallerArg->getType() &&
5020 isa<UndefValue>(CalleeArg))
5021 continue;
5022
5023 return false;
5024 }
5025
5026 return true;
5027}
5028
5029// Returns true if TCO is possible between the callers and callees
5030// calling conventions.
5031static bool
5033 CallingConv::ID CalleeCC) {
5034 // Tail calls are possible with fastcc and ccc.
5035 auto isTailCallableCC = [] (CallingConv::ID CC){
5036 return CC == CallingConv::C || CC == CallingConv::Fast;
5037 };
5038 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5039 return false;
5040
5041 // We can safely tail call both fastcc and ccc callees from a c calling
5042 // convention caller. If the caller is fastcc, we may have less stack space
5043 // than a non-fastcc caller with the same signature so disable tail-calls in
5044 // that case.
5045 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5046}
5047
5048bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5049 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5050 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5052 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5053 bool isCalleeExternalSymbol) const {
5054 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5055
5056 if (DisableSCO && !TailCallOpt) return false;
5057
5058 // Variadic argument functions are not supported.
5059 if (isVarArg) return false;
5060
5061 // Check that the calling conventions are compatible for tco.
5062 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5063 return false;
5064
5065 // Caller contains any byval parameter is not supported.
5066 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5067 return false;
5068
5069 // Callee contains any byval parameter is not supported, too.
5070 // Note: This is a quick work around, because in some cases, e.g.
5071 // caller's stack size > callee's stack size, we are still able to apply
5072 // sibling call optimization. For example, gcc is able to do SCO for caller1
5073 // in the following example, but not for caller2.
5074 // struct test {
5075 // long int a;
5076 // char ary[56];
5077 // } gTest;
5078 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5079 // b->a = v.a;
5080 // return 0;
5081 // }
5082 // void caller1(struct test a, struct test c, struct test *b) {
5083 // callee(gTest, b); }
5084 // void caller2(struct test *b) { callee(gTest, b); }
5085 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5086 return false;
5087
5088 // If callee and caller use different calling conventions, we cannot pass
5089 // parameters on stack since offsets for the parameter area may be different.
5090 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5091 return false;
5092
5093 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5094 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5095 // callee potentially have different TOC bases then we cannot tail call since
5096 // we need to restore the TOC pointer after the call.
5097 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5098 // We cannot guarantee this for indirect calls or calls to external functions.
5099 // When PC-Relative addressing is used, the concept of the TOC is no longer
5100 // applicable so this check is not required.
5101 // Check first for indirect calls.
5102 if (!Subtarget.isUsingPCRelativeCalls() &&
5103 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5104 return false;
5105
5106 // Check if we share the TOC base.
5107 if (!Subtarget.isUsingPCRelativeCalls() &&
5108 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5109 return false;
5110
5111 // TCO allows altering callee ABI, so we don't have to check further.
5112 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5113 return true;
5114
5115 if (DisableSCO) return false;
5116
5117 // If callee use the same argument list that caller is using, then we can
5118 // apply SCO on this case. If it is not, then we need to check if callee needs
5119 // stack for passing arguments.
5120 // PC Relative tail calls may not have a CallBase.
5121 // If there is no CallBase we cannot verify if we have the same argument
5122 // list so assume that we don't have the same argument list.
5123 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5124 needStackSlotPassParameters(Subtarget, Outs))
5125 return false;
5126 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5127 return false;
5128
5129 return true;
5130}
5131
5132/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5133/// for tail call optimization. Targets which want to do tail call
5134/// optimization should implement this function.
5135bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5136 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5137 CallingConv::ID CallerCC, bool isVarArg,
5138 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5139 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5140 return false;
5141
5142 // Variable argument functions are not supported.
5143 if (isVarArg)
5144 return false;
5145
5146 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5147 // Functions containing by val parameters are not supported.
5148 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5149 return false;
5150
5151 // Non-PIC/GOT tail calls are supported.
5152 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5153 return true;
5154
5155 // At the moment we can only do local tail calls (in same module, hidden
5156 // or protected) if we are generating PIC.
5157 if (CalleeGV)
5158 return CalleeGV->hasHiddenVisibility() ||
5159 CalleeGV->hasProtectedVisibility();
5160 }
5161
5162 return false;
5163}
5164
5165/// isCallCompatibleAddress - Return the immediate to use if the specified
5166/// 32-bit value is representable in the immediate field of a BxA instruction.
5168 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5169 if (!C) return nullptr;
5170
5171 int Addr = C->getZExtValue();
5172 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5173 SignExtend32<26>(Addr) != Addr)
5174 return nullptr; // Top 6 bits have to be sext of immediate.
5175
5176 return DAG
5177 .getConstant(
5178 (int)C->getZExtValue() >> 2, SDLoc(Op),
5180 .getNode();
5181}
5182
5183namespace {
5184
5185struct TailCallArgumentInfo {
5186 SDValue Arg;
5187 SDValue FrameIdxOp;
5188 int FrameIdx = 0;
5189
5190 TailCallArgumentInfo() = default;
5191};
5192
5193} // end anonymous namespace
5194
5195/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5197 SelectionDAG &DAG, SDValue Chain,
5198 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5199 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5200 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5201 SDValue Arg = TailCallArgs[i].Arg;
5202 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5203 int FI = TailCallArgs[i].FrameIdx;
5204 // Store relative to framepointer.
5205 MemOpChains.push_back(DAG.getStore(
5206 Chain, dl, Arg, FIN,
5208 }
5209}
5210
5211/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5212/// the appropriate stack slot for the tail call optimized function call.
5214 SDValue OldRetAddr, SDValue OldFP,
5215 int SPDiff, const SDLoc &dl) {
5216 if (SPDiff) {
5217 // Calculate the new stack slot for the return address.
5219 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5220 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5221 bool isPPC64 = Subtarget.isPPC64();
5222 int SlotSize = isPPC64 ? 8 : 4;
5223 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5224 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5225 NewRetAddrLoc, true);
5226 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5227 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5228 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5229 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5230 }
5231 return Chain;
5232}
5233
5234/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5235/// the position of the argument.
5236static void
5238 SDValue Arg, int SPDiff, unsigned ArgOffset,
5239 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5240 int Offset = ArgOffset + SPDiff;
5241 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5242 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5243 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5244 SDValue FIN = DAG.getFrameIndex(FI, VT);
5245 TailCallArgumentInfo Info;
5246 Info.Arg = Arg;
5247 Info.FrameIdxOp = FIN;
5248 Info.FrameIdx = FI;
5249 TailCallArguments.push_back(Info);
5250}
5251
5252/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5253/// stack slot. Returns the chain as result and the loaded frame pointers in
5254/// LROpOut/FPOpout. Used when tail calling.
5255SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5256 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5257 SDValue &FPOpOut, const SDLoc &dl) const {
5258 if (SPDiff) {
5259 // Load the LR and FP stack slot for later adjusting.
5260 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5261 LROpOut = getReturnAddrFrameIndex(DAG);
5262 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5263 Chain = SDValue(LROpOut.getNode(), 1);
5264 }
5265 return Chain;
5266}
5267
5268/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5269/// by "Src" to address "Dst" of size "Size". Alignment information is
5270/// specified by the specific parameter attribute. The copy will be passed as
5271/// a byval function parameter.
5272/// Sometimes what we are copying is the end of a larger object, the part that
5273/// does not fit in registers.
5275 SDValue Chain, ISD::ArgFlagsTy Flags,
5276 SelectionDAG &DAG, const SDLoc &dl) {
5277 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5278 return DAG.getMemcpy(
5279 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5280 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5281}
5282
5283/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5284/// tail calls.
5286 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5287 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5288 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5289 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5291 if (!isTailCall) {
5292 if (isVector) {
5293 SDValue StackPtr;
5294 if (isPPC64)
5295 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5296 else
5297 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5298 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5299 DAG.getConstant(ArgOffset, dl, PtrVT));
5300 }
5301 MemOpChains.push_back(
5302 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5303 // Calculate and remember argument location.
5304 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5305 TailCallArguments);
5306}
5307
5308static void
5310 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5311 SDValue FPOp,
5312 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5313 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5314 // might overwrite each other in case of tail call optimization.
5315 SmallVector<SDValue, 8> MemOpChains2;
5316 // Do not flag preceding copytoreg stuff together with the following stuff.
5317 InGlue = SDValue();
5318 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5319 MemOpChains2, dl);
5320 if (!MemOpChains2.empty())
5321 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5322
5323 // Store the return address to the appropriate stack slot.
5324 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5325
5326 // Emit callseq_end just before tailcall node.
5327 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5328 InGlue = Chain.getValue(1);
5329}
5330
5331// Is this global address that of a function that can be called by name? (as
5332// opposed to something that must hold a descriptor for an indirect call).
5333static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5334 if (GV) {
5335 if (GV->isThreadLocal())
5336 return false;
5337
5338 return GV->getValueType()->isFunctionTy();
5339 }
5340
5341 return false;
5342}
5343
5344SDValue PPCTargetLowering::LowerCallResult(
5345 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5346 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5347 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5349 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5350 *DAG.getContext());
5351
5352 CCRetInfo.AnalyzeCallResult(
5353 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5355 : RetCC_PPC);
5356
5357 // Copy all of the result registers out of their specified physreg.
5358 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5359 CCValAssign &VA = RVLocs[i];
5360 assert(VA.isRegLoc() && "Can only return in registers!");
5361
5362 SDValue Val;
5363
5364 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5365 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5366 InGlue);
5367 Chain = Lo.getValue(1);
5368 InGlue = Lo.getValue(2);
5369 VA = RVLocs[++i]; // skip ahead to next loc
5370 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5371 InGlue);
5372 Chain = Hi.getValue(1);
5373 InGlue = Hi.getValue(2);
5374 if (!Subtarget.isLittleEndian())
5375 std::swap (Lo, Hi);
5376 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5377 } else {
5378 Val = DAG.getCopyFromReg(Chain, dl,
5379 VA.getLocReg(), VA.getLocVT(), InGlue);
5380 Chain = Val.getValue(1);
5381 InGlue = Val.getValue(2);
5382 }
5383
5384 switch (VA.getLocInfo()) {
5385 default: llvm_unreachable("Unknown loc info!");
5386 case CCValAssign::Full: break;
5387 case CCValAssign::AExt:
5388 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5389 break;
5390 case CCValAssign::ZExt:
5391 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5392 DAG.getValueType(VA.getValVT()));
5393 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5394 break;
5395 case CCValAssign::SExt:
5396 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5397 DAG.getValueType(VA.getValVT()));
5398 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5399 break;
5400 }
5401
5402 InVals.push_back(Val);
5403 }
5404
5405 return Chain;
5406}
5407
5408static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5409 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5410 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5411 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5412
5413 // PatchPoint calls are not indirect.
5414 if (isPatchPoint)
5415 return false;
5416
5417 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5418 return false;
5419
5420 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5421 // becuase the immediate function pointer points to a descriptor instead of
5422 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5423 // pointer immediate points to the global entry point, while the BLA would
5424 // need to jump to the local entry point (see rL211174).
5425 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5426 isBLACompatibleAddress(Callee, DAG))
5427 return false;
5428
5429 return true;
5430}
5431
5432// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5433static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5434 return Subtarget.isAIXABI() ||
5435 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5436}
5437
5439 const Function &Caller, const SDValue &Callee,
5440 const PPCSubtarget &Subtarget,
5441 const TargetMachine &TM,
5442 bool IsStrictFPCall = false) {
5443 if (CFlags.IsTailCall)
5444 return PPCISD::TC_RETURN;
5445
5446 unsigned RetOpc = 0;
5447 // This is a call through a function pointer.
5448 if (CFlags.IsIndirect) {
5449 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5450 // indirect calls. The save of the caller's TOC pointer to the stack will be
5451 // inserted into the DAG as part of call lowering. The restore of the TOC
5452 // pointer is modeled by using a pseudo instruction for the call opcode that
5453 // represents the 2 instruction sequence of an indirect branch and link,
5454 // immediately followed by a load of the TOC pointer from the stack save
5455 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5456 // as it is not saved or used.
5458 : PPCISD::BCTRL;
5459 } else if (Subtarget.isUsingPCRelativeCalls()) {
5460 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5461 RetOpc = PPCISD::CALL_NOTOC;
5462 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5463 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5464 // immediately following the call instruction if the caller and callee may
5465 // have different TOC bases. At link time if the linker determines the calls
5466 // may not share a TOC base, the call is redirected to a trampoline inserted
5467 // by the linker. The trampoline will (among other things) save the callers
5468 // TOC pointer at an ABI designated offset in the linkage area and the
5469 // linker will rewrite the nop to be a load of the TOC pointer from the
5470 // linkage area into gpr2.
5471 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5472 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5473 RetOpc =
5475 } else
5476 RetOpc = PPCISD::CALL;
5477 if (IsStrictFPCall) {
5478 switch (RetOpc) {
5479 default:
5480 llvm_unreachable("Unknown call opcode");
5483 break;
5484 case PPCISD::BCTRL:
5485 RetOpc = PPCISD::BCTRL_RM;
5486 break;
5487 case PPCISD::CALL_NOTOC:
5488 RetOpc = PPCISD::CALL_NOTOC_RM;
5489 break;
5490 case PPCISD::CALL:
5491 RetOpc = PPCISD::CALL_RM;
5492 break;
5493 case PPCISD::CALL_NOP:
5494 RetOpc = PPCISD::CALL_NOP_RM;
5495 break;
5496 }
5497 }
5498 return RetOpc;
5499}
5500
5501static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5502 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5503 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5504 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5505 return SDValue(Dest, 0);
5506
5507 // Returns true if the callee is local, and false otherwise.
5508 auto isLocalCallee = [&]() {
5509 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5510 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5511
5512 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5513 !isa_and_nonnull<GlobalIFunc>(GV);
5514 };
5515
5516 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5517 // a static relocation model causes some versions of GNU LD (2.17.50, at
5518 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5519 // built with secure-PLT.
5520 bool UsePlt =
5521 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5523
5524 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5525 const TargetMachine &TM = Subtarget.getTargetMachine();
5526 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5527 MCSymbolXCOFF *S =
5528 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5529
5531 return DAG.getMCSymbol(S, PtrVT);
5532 };
5533
5534 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5535 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5536 if (isFunctionGlobalAddress(GV)) {
5537 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5538
5539 if (Subtarget.isAIXABI()) {
5540 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5541 return getAIXFuncEntryPointSymbolSDNode(GV);
5542 }
5543 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5544 UsePlt ? PPCII::MO_PLT : 0);
5545 }
5546
5547 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5548 const char *SymName = S->getSymbol();
5549 if (Subtarget.isAIXABI()) {
5550 // If there exists a user-declared function whose name is the same as the
5551 // ExternalSymbol's, then we pick up the user-declared version.
5553 if (const Function *F =
5554 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5555 return getAIXFuncEntryPointSymbolSDNode(F);
5556
5557 // On AIX, direct function calls reference the symbol for the function's
5558 // entry point, which is named by prepending a "." before the function's
5559 // C-linkage name. A Qualname is returned here because an external
5560 // function entry point is a csect with XTY_ER property.
5561 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5562 auto &Context = DAG.getMachineFunction().getContext();
5563 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5564 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5566 return Sec->getQualNameSymbol();
5567 };
5568
5569 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5570 }
5571 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5572 UsePlt ? PPCII::MO_PLT : 0);
5573 }
5574
5575 // No transformation needed.
5576 assert(Callee.getNode() && "What no callee?");
5577 return Callee;
5578}
5579
5581 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5582 "Expected a CALLSEQ_STARTSDNode.");
5583
5584 // The last operand is the chain, except when the node has glue. If the node
5585 // has glue, then the last operand is the glue, and the chain is the second
5586 // last operand.
5587 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5588 if (LastValue.getValueType() != MVT::Glue)
5589 return LastValue;
5590
5591 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5592}
5593
5594// Creates the node that moves a functions address into the count register
5595// to prepare for an indirect call instruction.
5596static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5597 SDValue &Glue, SDValue &Chain,
5598 const SDLoc &dl) {
5599 SDValue MTCTROps[] = {Chain, Callee, Glue};
5600 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5601 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5602 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5603 // The glue is the second value produced.
5604 Glue = Chain.getValue(1);
5605}
5606
5608 SDValue &Glue, SDValue &Chain,
5609 SDValue CallSeqStart,
5610 const CallBase *CB, const SDLoc &dl,
5611 bool hasNest,
5612 const PPCSubtarget &Subtarget) {
5613 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5614 // entry point, but to the function descriptor (the function entry point
5615 // address is part of the function descriptor though).
5616 // The function descriptor is a three doubleword structure with the
5617 // following fields: function entry point, TOC base address and
5618 // environment pointer.
5619 // Thus for a call through a function pointer, the following actions need
5620 // to be performed:
5621 // 1. Save the TOC of the caller in the TOC save area of its stack
5622 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5623 // 2. Load the address of the function entry point from the function
5624 // descriptor.
5625 // 3. Load the TOC of the callee from the function descriptor into r2.
5626 // 4. Load the environment pointer from the function descriptor into
5627 // r11.
5628 // 5. Branch to the function entry point address.
5629 // 6. On return of the callee, the TOC of the caller needs to be
5630 // restored (this is done in FinishCall()).
5631 //
5632 // The loads are scheduled at the beginning of the call sequence, and the
5633 // register copies are flagged together to ensure that no other
5634 // operations can be scheduled in between. E.g. without flagging the
5635 // copies together, a TOC access in the caller could be scheduled between
5636 // the assignment of the callee TOC and the branch to the callee, which leads
5637 // to incorrect code.
5638
5639 // Start by loading the function address from the descriptor.
5640 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5641 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5645
5646 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5647
5648 // Registers used in building the DAG.
5649 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5650 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5651
5652 // Offsets of descriptor members.
5653 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5654 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5655
5656 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5657 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5658
5659 // One load for the functions entry point address.
5660 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5661 Alignment, MMOFlags);
5662
5663 // One for loading the TOC anchor for the module that contains the called
5664 // function.
5665 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5666 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5667 SDValue TOCPtr =
5668 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5669 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5670
5671 // One for loading the environment pointer.
5672 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5673 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5674 SDValue LoadEnvPtr =
5675 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5676 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5677
5678
5679 // Then copy the newly loaded TOC anchor to the TOC pointer.
5680 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5681 Chain = TOCVal.getValue(0);
5682 Glue = TOCVal.getValue(1);
5683
5684 // If the function call has an explicit 'nest' parameter, it takes the
5685 // place of the environment pointer.
5686 assert((!hasNest || !Subtarget.isAIXABI()) &&
5687 "Nest parameter is not supported on AIX.");
5688 if (!hasNest) {
5689 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5690 Chain = EnvVal.getValue(0);
5691 Glue = EnvVal.getValue(1);
5692 }
5693
5694 // The rest of the indirect call sequence is the same as the non-descriptor
5695 // DAG.
5696 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5697}
5698
5699static void
5701 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5702 SelectionDAG &DAG,
5703 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5704 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5705 const PPCSubtarget &Subtarget) {
5706 const bool IsPPC64 = Subtarget.isPPC64();
5707 // MVT for a general purpose register.
5708 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5709
5710 // First operand is always the chain.
5711 Ops.push_back(Chain);
5712
5713 // If it's a direct call pass the callee as the second operand.
5714 if (!CFlags.IsIndirect)
5715 Ops.push_back(Callee);
5716 else {
5717 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5718
5719 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5720 // on the stack (this would have been done in `LowerCall_64SVR4` or
5721 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5722 // represents both the indirect branch and a load that restores the TOC
5723 // pointer from the linkage area. The operand for the TOC restore is an add
5724 // of the TOC save offset to the stack pointer. This must be the second
5725 // operand: after the chain input but before any other variadic arguments.
5726 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5727 // saved or used.
5728 if (isTOCSaveRestoreRequired(Subtarget)) {
5729 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5730
5731 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5732 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5733 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5734 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5735 Ops.push_back(AddTOC);
5736 }
5737
5738 // Add the register used for the environment pointer.
5739 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5741 RegVT));
5742
5743
5744 // Add CTR register as callee so a bctr can be emitted later.
5745 if (CFlags.IsTailCall)
5746 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5747 }
5748
5749 // If this is a tail call add stack pointer delta.
5750 if (CFlags.IsTailCall)
5751 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5752
5753 // Add argument registers to the end of the list so that they are known live
5754 // into the call.
5755 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5756 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5757 RegsToPass[i].second.getValueType()));
5758
5759 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5760 // no way to mark dependencies as implicit here.
5761 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5762 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5763 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5764 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5765
5766 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5767 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5768 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5769
5770 // Add a register mask operand representing the call-preserved registers.
5771 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5772 const uint32_t *Mask =
5773 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5774 assert(Mask && "Missing call preserved mask for calling convention");
5775 Ops.push_back(DAG.getRegisterMask(Mask));
5776
5777 // If the glue is valid, it is the last operand.
5778 if (Glue.getNode())
5779 Ops.push_back(Glue);
5780}
5781
5782SDValue PPCTargetLowering::FinishCall(
5783 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5784 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5785 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5786 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5787 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5788
5789 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5790 Subtarget.isAIXABI())
5791 setUsesTOCBasePtr(DAG);
5792
5793 unsigned CallOpc =
5794 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5795 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5796
5797 if (!CFlags.IsIndirect)
5798 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5799 else if (Subtarget.usesFunctionDescriptors())
5800 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5801 dl, CFlags.HasNest, Subtarget);
5802 else
5803 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5804
5805 // Build the operand list for the call instruction.
5807 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5808 SPDiff, Subtarget);
5809
5810 // Emit tail call.
5811 if (CFlags.IsTailCall) {
5812 // Indirect tail call when using PC Relative calls do not have the same
5813 // constraints.
5814 assert(((Callee.getOpcode() == ISD::Register &&
5815 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5816 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5817 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5818 isa<ConstantSDNode>(Callee) ||
5819 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5820 "Expecting a global address, external symbol, absolute value, "
5821 "register or an indirect tail call when PC Relative calls are "
5822 "used.");
5823 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5824 assert(CallOpc == PPCISD::TC_RETURN &&
5825 "Unexpected call opcode for a tail call.");
5827 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5828 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5829 return Ret;
5830 }
5831
5832 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5833 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5834 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5835 Glue = Chain.getValue(1);
5836
5837 // When performing tail call optimization the callee pops its arguments off
5838 // the stack. Account for this here so these bytes can be pushed back on in
5839 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5840 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5842 ? NumBytes
5843 : 0;
5844
5845 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5846 Glue = Chain.getValue(1);
5847
5848 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5849 DAG, InVals);
5850}
5851
5853 CallingConv::ID CalleeCC = CB->getCallingConv();
5854 const Function *CallerFunc = CB->getCaller();
5855 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5856 const Function *CalleeFunc = CB->getCalledFunction();
5857 if (!CalleeFunc)
5858 return false;
5859 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5860
5863
5864 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5865 CalleeFunc->getAttributes(), Outs, *this,
5866 CalleeFunc->getDataLayout());
5867
5868 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5869 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5870 false /*isCalleeExternalSymbol*/);
5871}
5872
5873bool PPCTargetLowering::isEligibleForTCO(
5874 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5875 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5877 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5878 bool isCalleeExternalSymbol) const {
5879 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5880 return false;
5881
5882 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5883 return IsEligibleForTailCallOptimization_64SVR4(
5884 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5885 isCalleeExternalSymbol);
5886 else
5887 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5888 isVarArg, Ins);
5889}
5890
5891SDValue
5892PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5893 SmallVectorImpl<SDValue> &InVals) const {
5894 SelectionDAG &DAG = CLI.DAG;
5895 SDLoc &dl = CLI.DL;
5897 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5899 SDValue Chain = CLI.Chain;
5900 SDValue Callee = CLI.Callee;
5901 bool &isTailCall = CLI.IsTailCall;
5902 CallingConv::ID CallConv = CLI.CallConv;
5903 bool isVarArg = CLI.IsVarArg;
5904 bool isPatchPoint = CLI.IsPatchPoint;
5905 const CallBase *CB = CLI.CB;
5906
5907 if (isTailCall) {
5909 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5910 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5911 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5912 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5913
5914 isTailCall =
5915 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5916 &(MF.getFunction()), IsCalleeExternalSymbol);
5917 if (isTailCall) {
5918 ++NumTailCalls;
5919 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5920 ++NumSiblingCalls;
5921
5922 // PC Relative calls no longer guarantee that the callee is a Global
5923 // Address Node. The callee could be an indirect tail call in which
5924 // case the SDValue for the callee could be a load (to load the address
5925 // of a function pointer) or it may be a register copy (to move the
5926 // address of the callee from a function parameter into a virtual
5927 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5928 assert((Subtarget.isUsingPCRelativeCalls() ||
5929 isa<GlobalAddressSDNode>(Callee)) &&
5930 "Callee should be an llvm::Function object.");
5931
5932 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5933 << "\nTCO callee: ");
5934 LLVM_DEBUG(Callee.dump());
5935 }
5936 }
5937
5938 if (!isTailCall && CB && CB->isMustTailCall())
5939 report_fatal_error("failed to perform tail call elimination on a call "
5940 "site marked musttail");
5941
5942 // When long calls (i.e. indirect calls) are always used, calls are always
5943 // made via function pointer. If we have a function name, first translate it
5944 // into a pointer.
5945 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5946 !isTailCall)
5947 Callee = LowerGlobalAddress(Callee, DAG);
5948
5949 CallFlags CFlags(
5950 CallConv, isTailCall, isVarArg, isPatchPoint,
5951 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5952 // hasNest
5953 Subtarget.is64BitELFABI() &&
5954 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5955 CLI.NoMerge);
5956
5957 if (Subtarget.isAIXABI())
5958 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5959 InVals, CB);
5960
5961 assert(Subtarget.isSVR4ABI());
5962 if (Subtarget.isPPC64())
5963 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5964 InVals, CB);
5965 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5966 InVals, CB);
5967}
5968
5969SDValue PPCTargetLowering::LowerCall_32SVR4(
5970 SDValue Chain, SDValue Callee, CallFlags CFlags,
5972 const SmallVectorImpl<SDValue> &OutVals,
5973 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5975 const CallBase *CB) const {
5976 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5977 // of the 32-bit SVR4 ABI stack frame layout.
5978
5979 const CallingConv::ID CallConv = CFlags.CallConv;
5980 const bool IsVarArg = CFlags.IsVarArg;
5981 const bool IsTailCall = CFlags.IsTailCall;
5982
5983 assert((CallConv == CallingConv::C ||
5984 CallConv == CallingConv::Cold ||
5985 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5986
5987 const Align PtrAlign(4);
5988
5990
5991 // Mark this function as potentially containing a function that contains a
5992 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5993 // and restoring the callers stack pointer in this functions epilog. This is
5994 // done because by tail calling the called function might overwrite the value
5995 // in this function's (MF) stack pointer stack slot 0(SP).
5996 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5997 CallConv == CallingConv::Fast)
5998 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5999
6000 // Count how many bytes are to be pushed on the stack, including the linkage
6001 // area, parameter list area and the part of the local variable space which
6002 // contains copies of aggregates which are passed by value.
6003
6004 // Assign locations to all of the outgoing arguments.
6006 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6007
6008 // Reserve space for the linkage area on the stack.
6009 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6010 PtrAlign);
6011 if (useSoftFloat())
6012 CCInfo.PreAnalyzeCallOperands(Outs);
6013
6014 if (IsVarArg) {
6015 // Handle fixed and variable vector arguments differently.
6016 // Fixed vector arguments go into registers as long as registers are
6017 // available. Variable vector arguments always go into memory.
6018 unsigned NumArgs = Outs.size();
6019
6020 for (unsigned i = 0; i != NumArgs; ++i) {
6021 MVT ArgVT = Outs[i].VT;
6022 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6023 bool Result;
6024
6025 if (Outs[i].IsFixed) {
6026 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6027 CCInfo);
6028 } else {
6030 ArgFlags, CCInfo);
6031 }
6032
6033 if (Result) {
6034#ifndef NDEBUG
6035 errs() << "Call operand #" << i << " has unhandled type "
6036 << ArgVT << "\n";
6037#endif
6038 llvm_unreachable(nullptr);
6039 }
6040 }
6041 } else {
6042 // All arguments are treated the same.
6043 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6044 }
6045 CCInfo.clearWasPPCF128();
6046
6047 // Assign locations to all of the outgoing aggregate by value arguments.
6048 SmallVector<CCValAssign, 16> ByValArgLocs;
6049 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6050
6051 // Reserve stack space for the allocations in CCInfo.
6052 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6053
6054 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6055
6056 // Size of the linkage area, parameter list area and the part of the local
6057 // space variable where copies of aggregates which are passed by value are
6058 // stored.
6059 unsigned NumBytes = CCByValInfo.getStackSize();
6060
6061 // Calculate by how many bytes the stack has to be adjusted in case of tail
6062 // call optimization.
6063 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6064
6065 // Adjust the stack pointer for the new arguments...
6066 // These operations are automatically eliminated by the prolog/epilog pass
6067 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6068 SDValue CallSeqStart = Chain;
6069
6070 // Load the return address and frame pointer so it can be moved somewhere else
6071 // later.
6072 SDValue LROp, FPOp;
6073 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6074
6075 // Set up a copy of the stack pointer for use loading and storing any
6076 // arguments that may not fit in the registers available for argument
6077 // passing.
6078 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6079
6081 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6082 SmallVector<SDValue, 8> MemOpChains;
6083
6084 bool seenFloatArg = false;
6085 // Walk the register/memloc assignments, inserting copies/loads.
6086 // i - Tracks the index into the list of registers allocated for the call
6087 // RealArgIdx - Tracks the index into the list of actual function arguments
6088 // j - Tracks the index into the list of byval arguments
6089 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6090 i != e;
6091 ++i, ++RealArgIdx) {
6092 CCValAssign &VA = ArgLocs[i];
6093 SDValue Arg = OutVals[RealArgIdx];
6094 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6095
6096 if (Flags.isByVal()) {
6097 // Argument is an aggregate which is passed by value, thus we need to
6098 // create a copy of it in the local variable space of the current stack
6099 // frame (which is the stack frame of the caller) and pass the address of
6100 // this copy to the callee.
6101 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6102 CCValAssign &ByValVA = ByValArgLocs[j++];
6103 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6104
6105 // Memory reserved in the local variable space of the callers stack frame.
6106 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6107
6108 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6109 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6110 StackPtr, PtrOff);
6111
6112 // Create a copy of the argument in the local area of the current
6113 // stack frame.
6114 SDValue MemcpyCall =
6115 CreateCopyOfByValArgument(Arg, PtrOff,
6116 CallSeqStart.getNode()->getOperand(0),
6117 Flags, DAG, dl);
6118
6119 // This must go outside the CALLSEQ_START..END.
6120 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6121 SDLoc(MemcpyCall));
6122 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6123 NewCallSeqStart.getNode());
6124 Chain = CallSeqStart = NewCallSeqStart;
6125
6126 // Pass the address of the aggregate copy on the stack either in a
6127 // physical register or in the parameter list area of the current stack
6128 // frame to the callee.
6129 Arg = PtrOff;
6130 }
6131
6132 // When useCRBits() is true, there can be i1 arguments.
6133 // It is because getRegisterType(MVT::i1) => MVT::i1,
6134 // and for other integer types getRegisterType() => MVT::i32.
6135 // Extend i1 and ensure callee will get i32.
6136 if (Arg.getValueType() == MVT::i1)
6137 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6138 dl, MVT::i32, Arg);
6139
6140 if (VA.isRegLoc()) {
6141 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6142 // Put argument in a physical register.
6143 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6144 bool IsLE = Subtarget.isLittleEndian();
6145 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6146 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6147 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6148 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6149 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6150 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6151 SVal.getValue(0)));
6152 } else
6153 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6154 } else {
6155 // Put argument in the parameter list area of the current stack frame.
6156 assert(VA.isMemLoc());
6157 unsigned LocMemOffset = VA.getLocMemOffset();
6158
6159 if (!IsTailCall) {
6160 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6161 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6162 StackPtr, PtrOff);
6163
6164 MemOpChains.push_back(
6165 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6166 } else {
6167 // Calculate and remember argument location.
6168 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6169 TailCallArguments);
6170 }
6171 }
6172 }
6173
6174 if (!MemOpChains.empty())
6175 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6176
6177 // Build a sequence of copy-to-reg nodes chained together with token chain
6178 // and flag operands which copy the outgoing args into the appropriate regs.
6179 SDValue InGlue;
6180 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6181 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6182 RegsToPass[i].second, InGlue);
6183 InGlue = Chain.getValue(1);
6184 }
6185
6186 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6187 // registers.
6188 if (IsVarArg) {
6189 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6190 SDValue Ops[] = { Chain, InGlue };
6191
6192 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6193 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6194
6195 InGlue = Chain.getValue(1);
6196 }
6197
6198 if (IsTailCall)
6199 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6200 TailCallArguments);
6201
6202 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6203 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6204}
6205
6206// Copy an argument into memory, being careful to do this outside the
6207// call sequence for the call to which the argument belongs.
6208SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6209 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6210 SelectionDAG &DAG, const SDLoc &dl) const {
6211 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6212 CallSeqStart.getNode()->getOperand(0),
6213 Flags, DAG, dl);
6214 // The MEMCPY must go outside the CALLSEQ_START..END.
6215 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6216 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6217 SDLoc(MemcpyCall));
6218 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6219 NewCallSeqStart.getNode());
6220 return NewCallSeqStart;
6221}
6222
6223SDValue PPCTargetLowering::LowerCall_64SVR4(
6224 SDValue Chain, SDValue Callee, CallFlags CFlags,
6226 const SmallVectorImpl<SDValue> &OutVals,
6227 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6229 const CallBase *CB) const {
6230 bool isELFv2ABI = Subtarget.isELFv2ABI();
6231 bool isLittleEndian = Subtarget.isLittleEndian();
6232 unsigned NumOps = Outs.size();
6233 bool IsSibCall = false;
6234 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6235
6236 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6237 unsigned PtrByteSize = 8;
6238
6240
6241 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6242 IsSibCall = true;
6243
6244 // Mark this function as potentially containing a function that contains a
6245 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6246 // and restoring the callers stack pointer in this functions epilog. This is
6247 // done because by tail calling the called function might overwrite the value
6248 // in this function's (MF) stack pointer stack slot 0(SP).
6249 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6250 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6251
6252 assert(!(IsFastCall && CFlags.IsVarArg) &&
6253 "fastcc not supported on varargs functions");
6254
6255 // Count how many bytes are to be pushed on the stack, including the linkage
6256 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6257 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6258 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6259 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6260 unsigned NumBytes = LinkageSize;
6261 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6262
6263 static const MCPhysReg GPR[] = {
6264 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6265 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6266 };
6267 static const MCPhysReg VR[] = {
6268 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6269 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6270 };
6271
6272 const unsigned NumGPRs = std::size(GPR);
6273 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6274 const unsigned NumVRs = std::size(VR);
6275
6276 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6277 // can be passed to the callee in registers.
6278 // For the fast calling convention, there is another check below.
6279 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6280 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6281 if (!HasParameterArea) {
6282 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6283 unsigned AvailableFPRs = NumFPRs;
6284 unsigned AvailableVRs = NumVRs;
6285 unsigned NumBytesTmp = NumBytes;
6286 for (unsigned i = 0; i != NumOps; ++i) {
6287 if (Outs[i].Flags.isNest()) continue;
6288 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6289 PtrByteSize, LinkageSize, ParamAreaSize,
6290 NumBytesTmp, AvailableFPRs, AvailableVRs))
6291 HasParameterArea = true;
6292 }
6293 }
6294
6295 // When using the fast calling convention, we don't provide backing for
6296 // arguments that will be in registers.
6297 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6298
6299 // Avoid allocating parameter area for fastcc functions if all the arguments
6300 // can be passed in the registers.
6301 if (IsFastCall)
6302 HasParameterArea = false;
6303
6304 // Add up all the space actually used.
6305 for (unsigned i = 0; i != NumOps; ++i) {
6306 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6307 EVT ArgVT = Outs[i].VT;
6308 EVT OrigVT = Outs[i].ArgVT;
6309
6310 if (Flags.isNest())
6311 continue;
6312
6313 if (IsFastCall) {
6314 if (Flags.isByVal()) {
6315 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6316 if (NumGPRsUsed > NumGPRs)
6317 HasParameterArea = true;
6318 } else {
6319 switch (ArgVT.getSimpleVT().SimpleTy) {
6320 default: llvm_unreachable("Unexpected ValueType for argument!");
6321 case MVT::i1:
6322 case MVT::i32:
6323 case MVT::i64:
6324 if (++NumGPRsUsed <= NumGPRs)
6325 continue;
6326 break;
6327 case MVT::v4i32:
6328 case MVT::v8i16:
6329 case MVT::v16i8:
6330 case MVT::v2f64:
6331 case MVT::v2i64:
6332 case MVT::v1i128:
6333 case MVT::f128:
6334 if (++NumVRsUsed <= NumVRs)
6335 continue;
6336 break;
6337 case MVT::v4f32:
6338 if (++NumVRsUsed <= NumVRs)
6339 continue;
6340 break;
6341 case MVT::f32:
6342 case MVT::f64:
6343 if (++NumFPRsUsed <= NumFPRs)
6344 continue;
6345 break;
6346 }
6347 HasParameterArea = true;
6348 }
6349 }
6350
6351 /* Respect alignment of argument on the stack. */
6352 auto Alignement =
6353 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6354 NumBytes = alignTo(NumBytes, Alignement);
6355
6356 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6357 if (Flags.isInConsecutiveRegsLast())
6358 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6359 }
6360
6361 unsigned NumBytesActuallyUsed = NumBytes;
6362
6363 // In the old ELFv1 ABI,
6364 // the prolog code of the callee may store up to 8 GPR argument registers to
6365 // the stack, allowing va_start to index over them in memory if its varargs.
6366 // Because we cannot tell if this is needed on the caller side, we have to
6367 // conservatively assume that it is needed. As such, make sure we have at
6368 // least enough stack space for the caller to store the 8 GPRs.
6369 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6370 // really requires memory operands, e.g. a vararg function.
6371 if (HasParameterArea)
6372 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6373 else
6374 NumBytes = LinkageSize;
6375
6376 // Tail call needs the stack to be aligned.
6377 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6378 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6379
6380 int SPDiff = 0;
6381
6382 // Calculate by how many bytes the stack has to be adjusted in case of tail
6383 // call optimization.
6384 if (!IsSibCall)
6385 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6386
6387 // To protect arguments on the stack from being clobbered in a tail call,
6388 // force all the loads to happen before doing any other lowering.
6389 if (CFlags.IsTailCall)
6390 Chain = DAG.getStackArgumentTokenFactor(Chain);
6391
6392 // Adjust the stack pointer for the new arguments...
6393 // These operations are automatically eliminated by the prolog/epilog pass
6394 if (!IsSibCall)
6395 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6396 SDValue CallSeqStart = Chain;
6397
6398 // Load the return address and frame pointer so it can be move somewhere else
6399 // later.
6400 SDValue LROp, FPOp;
6401 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6402
6403 // Set up a copy of the stack pointer for use loading and storing any
6404 // arguments that may not fit in the registers available for argument
6405 // passing.
6406 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6407
6408 // Figure out which arguments are going to go in registers, and which in
6409 // memory. Also, if this is a vararg function, floating point operations
6410 // must be stored to our stack, and loaded into integer regs as well, if
6411 // any integer regs are available for argument passing.
6412 unsigned ArgOffset = LinkageSize;
6413
6415 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6416
6417 SmallVector<SDValue, 8> MemOpChains;
6418 for (unsigned i = 0; i != NumOps; ++i) {
6419 SDValue Arg = OutVals[i];
6420 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6421 EVT ArgVT = Outs[i].VT;
6422 EVT OrigVT = Outs[i].ArgVT;
6423
6424 // PtrOff will be used to store the current argument to the stack if a
6425 // register cannot be found for it.
6426 SDValue PtrOff;
6427
6428 // We re-align the argument offset for each argument, except when using the
6429 // fast calling convention, when we need to make sure we do that only when
6430 // we'll actually use a stack slot.
6431 auto ComputePtrOff = [&]() {
6432 /* Respect alignment of argument on the stack. */
6433 auto Alignment =
6434 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6435 ArgOffset = alignTo(ArgOffset, Alignment);
6436
6437 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6438
6439 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6440 };
6441
6442 if (!IsFastCall) {
6443 ComputePtrOff();
6444
6445 /* Compute GPR index associated with argument offset. */
6446 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6447 GPR_idx = std::min(GPR_idx, NumGPRs);
6448 }
6449
6450 // Promote integers to 64-bit values.
6451 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6452 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6453 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6454 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6455 }
6456
6457 // FIXME memcpy is used way more than necessary. Correctness first.
6458 // Note: "by value" is code for passing a structure by value, not
6459 // basic types.
6460 if (Flags.isByVal()) {
6461 // Note: Size includes alignment padding, so
6462 // struct x { short a; char b; }
6463 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6464 // These are the proper values we need for right-justifying the
6465 // aggregate in a parameter register.
6466 unsigned Size = Flags.getByValSize();
6467
6468 // An empty aggregate parameter takes up no storage and no
6469 // registers.
6470 if (Size == 0)
6471 continue;
6472
6473 if (IsFastCall)
6474 ComputePtrOff();
6475
6476 // All aggregates smaller than 8 bytes must be passed right-justified.
6477 if (Size==1 || Size==2 || Size==4) {
6478 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6479 if (GPR_idx != NumGPRs) {
6480 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6481 MachinePointerInfo(), VT);
6482 MemOpChains.push_back(Load.getValue(1));
6483 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6484
6485 ArgOffset += PtrByteSize;
6486 continue;
6487 }
6488 }
6489
6490 if (GPR_idx == NumGPRs && Size < 8) {
6491 SDValue AddPtr = PtrOff;
6492 if (!isLittleEndian) {
6493 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6494 PtrOff.getValueType());
6495 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6496 }
6497 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6498 CallSeqStart,
6499 Flags, DAG, dl);
6500 ArgOffset += PtrByteSize;
6501 continue;
6502 }
6503 // Copy the object to parameter save area if it can not be entirely passed
6504 // by registers.
6505 // FIXME: we only need to copy the parts which need to be passed in
6506 // parameter save area. For the parts passed by registers, we don't need
6507 // to copy them to the stack although we need to allocate space for them
6508 // in parameter save area.
6509 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6510 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6511 CallSeqStart,
6512 Flags, DAG, dl);
6513
6514 // When a register is available, pass a small aggregate right-justified.
6515 if (Size < 8 && GPR_idx != NumGPRs) {
6516 // The easiest way to get this right-justified in a register
6517 // is to copy the structure into the rightmost portion of a
6518 // local variable slot, then load the whole slot into the
6519 // register.
6520 // FIXME: The memcpy seems to produce pretty awful code for
6521 // small aggregates, particularly for packed ones.
6522 // FIXME: It would be preferable to use the slot in the
6523 // parameter save area instead of a new local variable.
6524 SDValue AddPtr = PtrOff;
6525 if (!isLittleEndian) {
6526 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6527 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6528 }
6529 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6530 CallSeqStart,
6531 Flags, DAG, dl);
6532
6533 // Load the slot into the register.
6534 SDValue Load =
6535 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6536 MemOpChains.push_back(Load.getValue(1));
6537 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6538
6539 // Done with this argument.
6540 ArgOffset += PtrByteSize;
6541 continue;
6542 }
6543
6544 // For aggregates larger than PtrByteSize, copy the pieces of the
6545 // object that fit into registers from the parameter save area.
6546 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6547 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6548 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6549 if (GPR_idx != NumGPRs) {
6550 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6551 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6552 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6553 MachinePointerInfo(), ObjType);
6554
6555 MemOpChains.push_back(Load.getValue(1));
6556 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6557 ArgOffset += PtrByteSize;
6558 } else {
6559 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6560 break;
6561 }
6562 }
6563 continue;
6564 }
6565
6566 switch (Arg.getSimpleValueType().SimpleTy) {
6567 default: llvm_unreachable("Unexpected ValueType for argument!");
6568 case MVT::i1:
6569 case MVT::i32:
6570 case MVT::i64:
6571 if (Flags.isNest()) {
6572 // The 'nest' parameter, if any, is passed in R11.
6573 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6574 break;
6575 }
6576
6577 // These can be scalar arguments or elements of an integer array type
6578 // passed directly. Clang may use those instead of "byval" aggregate
6579 // types to avoid forcing arguments to memory unnecessarily.
6580 if (GPR_idx != NumGPRs) {
6581 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6582 } else {
6583 if (IsFastCall)
6584 ComputePtrOff();
6585
6586 assert(HasParameterArea &&
6587 "Parameter area must exist to pass an argument in memory.");
6588 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6589 true, CFlags.IsTailCall, false, MemOpChains,
6590 TailCallArguments, dl);
6591 if (IsFastCall)
6592 ArgOffset += PtrByteSize;
6593 }
6594 if (!IsFastCall)
6595 ArgOffset += PtrByteSize;
6596 break;
6597 case MVT::f32:
6598 case MVT::f64: {
6599 // These can be scalar arguments or elements of a float array type
6600 // passed directly. The latter are used to implement ELFv2 homogenous
6601 // float aggregates.
6602
6603 // Named arguments go into FPRs first, and once they overflow, the
6604 // remaining arguments go into GPRs and then the parameter save area.
6605 // Unnamed arguments for vararg functions always go to GPRs and
6606 // then the parameter save area. For now, put all arguments to vararg
6607 // routines always in both locations (FPR *and* GPR or stack slot).
6608 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6609 bool NeededLoad = false;
6610
6611 // First load the argument into the next available FPR.
6612 if (FPR_idx != NumFPRs)
6613 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6614
6615 // Next, load the argument into GPR or stack slot if needed.
6616 if (!NeedGPROrStack)
6617 ;
6618 else if (GPR_idx != NumGPRs && !IsFastCall) {
6619 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6620 // once we support fp <-> gpr moves.
6621
6622 // In the non-vararg case, this can only ever happen in the
6623 // presence of f32 array types, since otherwise we never run
6624 // out of FPRs before running out of GPRs.
6625 SDValue ArgVal;
6626
6627 // Double values are always passed in a single GPR.
6628 if (Arg.getValueType() != MVT::f32) {
6629 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6630
6631 // Non-array float values are extended and passed in a GPR.
6632 } else if (!Flags.isInConsecutiveRegs()) {
6633 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6634 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6635
6636 // If we have an array of floats, we collect every odd element
6637 // together with its predecessor into one GPR.
6638 } else if (ArgOffset % PtrByteSize != 0) {
6639 SDValue Lo, Hi;
6640 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6641 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6642 if (!isLittleEndian)
6643 std::swap(Lo, Hi);
6644 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6645
6646 // The final element, if even, goes into the first half of a GPR.
6647 } else if (Flags.isInConsecutiveRegsLast()) {
6648 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6649 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6650 if (!isLittleEndian)
6651 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6652 DAG.getConstant(32, dl, MVT::i32));
6653
6654 // Non-final even elements are skipped; they will be handled
6655 // together the with subsequent argument on the next go-around.
6656 } else
6657 ArgVal = SDValue();
6658
6659 if (ArgVal.getNode())
6660 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6661 } else {
6662 if (IsFastCall)
6663 ComputePtrOff();
6664
6665 // Single-precision floating-point values are mapped to the
6666 // second (rightmost) word of the stack doubleword.
6667 if (Arg.getValueType() == MVT::f32 &&
6668 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6669 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6670 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6671 }
6672
6673 assert(HasParameterArea &&
6674 "Parameter area must exist to pass an argument in memory.");
6675 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6676 true, CFlags.IsTailCall, false, MemOpChains,
6677 TailCallArguments, dl);
6678
6679 NeededLoad = true;
6680 }
6681 // When passing an array of floats, the array occupies consecutive
6682 // space in the argument area; only round up to the next doubleword
6683 // at the end of the array. Otherwise, each float takes 8 bytes.
6684 if (!IsFastCall || NeededLoad) {
6685 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6686 Flags.isInConsecutiveRegs()) ? 4 : 8;
6687 if (Flags.isInConsecutiveRegsLast())
6688 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6689 }
6690 break;
6691 }
6692 case MVT::v4f32:
6693 case MVT::v4i32:
6694 case MVT::v8i16:
6695 case MVT::v16i8:
6696 case MVT::v2f64:
6697 case MVT::v2i64:
6698 case MVT::v1i128:
6699 case MVT::f128:
6700 // These can be scalar arguments or elements of a vector array type
6701 // passed directly. The latter are used to implement ELFv2 homogenous
6702 // vector aggregates.
6703
6704 // For a varargs call, named arguments go into VRs or on the stack as
6705 // usual; unnamed arguments always go to the stack or the corresponding
6706 // GPRs when within range. For now, we always put the value in both
6707 // locations (or even all three).
6708 if (CFlags.IsVarArg) {
6709 assert(HasParameterArea &&
6710 "Parameter area must exist if we have a varargs call.");
6711 // We could elide this store in the case where the object fits
6712 // entirely in R registers. Maybe later.
6713 SDValue Store =
6714 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6715 MemOpChains.push_back(Store);
6716 if (VR_idx != NumVRs) {
6717 SDValue Load =
6718 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6719 MemOpChains.push_back(Load.getValue(1));
6720 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6721 }
6722 ArgOffset += 16;
6723 for (unsigned i=0; i<16; i+=PtrByteSize) {
6724 if (GPR_idx == NumGPRs)
6725 break;
6726 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6727 DAG.getConstant(i, dl, PtrVT));
6728 SDValue Load =
6729 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6730 MemOpChains.push_back(Load.getValue(1));
6731 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6732 }
6733 break;
6734 }
6735
6736 // Non-varargs Altivec params go into VRs or on the stack.
6737 if (VR_idx != NumVRs) {
6738 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6739 } else {
6740 if (IsFastCall)
6741 ComputePtrOff();
6742
6743 assert(HasParameterArea &&
6744 "Parameter area must exist to pass an argument in memory.");
6745 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6746 true, CFlags.IsTailCall, true, MemOpChains,
6747 TailCallArguments, dl);
6748 if (IsFastCall)
6749 ArgOffset += 16;
6750 }
6751
6752 if (!IsFastCall)
6753 ArgOffset += 16;
6754 break;
6755 }
6756 }
6757
6758 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6759 "mismatch in size of parameter area");
6760 (void)NumBytesActuallyUsed;
6761
6762 if (!MemOpChains.empty())
6763 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6764
6765 // Check if this is an indirect call (MTCTR/BCTRL).
6766 // See prepareDescriptorIndirectCall and buildCallOperands for more
6767 // information about calls through function pointers in the 64-bit SVR4 ABI.
6768 if (CFlags.IsIndirect) {
6769 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6770 // caller in the TOC save area.
6771 if (isTOCSaveRestoreRequired(Subtarget)) {
6772 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6773 // Load r2 into a virtual register and store it to the TOC save area.
6774 setUsesTOCBasePtr(DAG);
6775 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6776 // TOC save area offset.
6777 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6778 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6779 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6780 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6782 DAG.getMachineFunction(), TOCSaveOffset));
6783 }
6784 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6785 // This does not mean the MTCTR instruction must use R12; it's easier
6786 // to model this as an extra parameter, so do that.
6787 if (isELFv2ABI && !CFlags.IsPatchPoint)
6788 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6789 }
6790
6791 // Build a sequence of copy-to-reg nodes chained together with token chain
6792 // and flag operands which copy the outgoing args into the appropriate regs.
6793 SDValue InGlue;
6794 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6795 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6796 RegsToPass[i].second, InGlue);
6797 InGlue = Chain.getValue(1);
6798 }
6799
6800 if (CFlags.IsTailCall && !IsSibCall)
6801 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6802 TailCallArguments);
6803
6804 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6805 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6806}
6807
6808// Returns true when the shadow of a general purpose argument register
6809// in the parameter save area is aligned to at least 'RequiredAlign'.
6810static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6811 assert(RequiredAlign.value() <= 16 &&
6812 "Required alignment greater than stack alignment.");
6813 switch (Reg) {
6814 default:
6815 report_fatal_error("called on invalid register.");
6816 case PPC::R5:
6817 case PPC::R9:
6818 case PPC::X3:
6819 case PPC::X5:
6820 case PPC::X7:
6821 case PPC::X9:
6822 // These registers are 16 byte aligned which is the most strict aligment
6823 // we can support.
6824 return true;
6825 case PPC::R3:
6826 case PPC::R7:
6827 case PPC::X4:
6828 case PPC::X6:
6829 case PPC::X8:
6830 case PPC::X10:
6831 // The shadow of these registers in the PSA is 8 byte aligned.
6832 return RequiredAlign <= 8;
6833 case PPC::R4:
6834 case PPC::R6:
6835 case PPC::R8:
6836 case PPC::R10:
6837 return RequiredAlign <= 4;
6838 }
6839}
6840
6841static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6842 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6843 CCState &S) {
6844 AIXCCState &State = static_cast<AIXCCState &>(S);
6845 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6847 const bool IsPPC64 = Subtarget.isPPC64();
6848 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6849 const Align PtrAlign(PtrSize);
6850 const Align StackAlign(16);
6851 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6852
6853 if (ValVT == MVT::f128)
6854 report_fatal_error("f128 is unimplemented on AIX.");
6855
6856 if (ArgFlags.isNest())
6857 report_fatal_error("Nest arguments are unimplemented.");
6858
6859 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6860 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6861 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6862 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6863 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6864 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6865
6866 static const MCPhysReg VR[] = {// Vector registers.
6867 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6868 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6869 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6870
6871 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6872
6873 if (ArgFlags.isByVal()) {
6874 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6875 if (ByValAlign > StackAlign)
6876 report_fatal_error("Pass-by-value arguments with alignment greater than "
6877 "16 are not supported.");
6878
6879 const unsigned ByValSize = ArgFlags.getByValSize();
6880 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6881
6882 // An empty aggregate parameter takes up no storage and no registers,
6883 // but needs a MemLoc for a stack slot for the formal arguments side.
6884 if (ByValSize == 0) {
6886 State.getStackSize(), RegVT, LocInfo));
6887 return false;
6888 }
6889
6890 // Shadow allocate any registers that are not properly aligned.
6891 unsigned NextReg = State.getFirstUnallocated(GPRs);
6892 while (NextReg != GPRs.size() &&
6893 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6894 // Shadow allocate next registers since its aligment is not strict enough.
6895 unsigned Reg = State.AllocateReg(GPRs);
6896 // Allocate the stack space shadowed by said register.
6897 State.AllocateStack(PtrSize, PtrAlign);
6898 assert(Reg && "Alocating register unexpectedly failed.");
6899 (void)Reg;
6900 NextReg = State.getFirstUnallocated(GPRs);
6901 }
6902
6903 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6904 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6905 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6906 if (unsigned Reg = State.AllocateReg(GPRs))
6907 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6908 else {
6911 LocInfo));
6912 break;
6913 }
6914 }
6915 return false;
6916 }
6917
6918 // Arguments always reserve parameter save area.
6919 switch (ValVT.SimpleTy) {
6920 default:
6921 report_fatal_error("Unhandled value type for argument.");
6922 case MVT::i64:
6923 // i64 arguments should have been split to i32 for PPC32.
6924 assert(IsPPC64 && "PPC32 should have split i64 values.");
6925 [[fallthrough]];
6926 case MVT::i1:
6927 case MVT::i32: {
6928 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6929 // AIX integer arguments are always passed in register width.
6930 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6931 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6933 if (unsigned Reg = State.AllocateReg(GPRs))
6934 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6935 else
6936 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6937
6938 return false;
6939 }
6940 case MVT::f32:
6941 case MVT::f64: {
6942 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6943 const unsigned StoreSize = LocVT.getStoreSize();
6944 // Floats are always 4-byte aligned in the PSA on AIX.
6945 // This includes f64 in 64-bit mode for ABI compatibility.
6946 const unsigned Offset =
6947 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6948 unsigned FReg = State.AllocateReg(FPR);
6949 if (FReg)
6950 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6951
6952 // Reserve and initialize GPRs or initialize the PSA as required.
6953 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6954 if (unsigned Reg = State.AllocateReg(GPRs)) {
6955 assert(FReg && "An FPR should be available when a GPR is reserved.");
6956 if (State.isVarArg()) {
6957 // Successfully reserved GPRs are only initialized for vararg calls.
6958 // Custom handling is required for:
6959 // f64 in PPC32 needs to be split into 2 GPRs.
6960 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6961 State.addLoc(
6962 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6963 }
6964 } else {
6965 // If there are insufficient GPRs, the PSA needs to be initialized.
6966 // Initialization occurs even if an FPR was initialized for
6967 // compatibility with the AIX XL compiler. The full memory for the
6968 // argument will be initialized even if a prior word is saved in GPR.
6969 // A custom memLoc is used when the argument also passes in FPR so
6970 // that the callee handling can skip over it easily.
6971 State.addLoc(
6972 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6973 LocInfo)
6974 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6975 break;
6976 }
6977 }
6978
6979 return false;
6980 }
6981 case MVT::v4f32:
6982 case MVT::v4i32:
6983 case MVT::v8i16:
6984 case MVT::v16i8:
6985 case MVT::v2i64:
6986 case MVT::v2f64:
6987 case MVT::v1i128: {
6988 const unsigned VecSize = 16;
6989 const Align VecAlign(VecSize);
6990
6991 if (!State.isVarArg()) {
6992 // If there are vector registers remaining we don't consume any stack
6993 // space.
6994 if (unsigned VReg = State.AllocateReg(VR)) {
6995 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6996 return false;
6997 }
6998 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6999 // might be allocated in the portion of the PSA that is shadowed by the
7000 // GPRs.
7001 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7002 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7003 return false;
7004 }
7005
7006 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7007 // Burn any underaligned registers and their shadowed stack space until
7008 // we reach the required alignment.
7009 while (NextRegIndex != GPRs.size() &&
7010 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7011 // Shadow allocate register and its stack shadow.
7012 unsigned Reg = State.AllocateReg(GPRs);
7013 State.AllocateStack(PtrSize, PtrAlign);
7014 assert(Reg && "Allocating register unexpectedly failed.");
7015 (void)Reg;
7016 NextRegIndex = State.getFirstUnallocated(GPRs);
7017 }
7018
7019 // Vectors that are passed as fixed arguments are handled differently.
7020 // They are passed in VRs if any are available (unlike arguments passed
7021 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7022 // functions)
7023 if (State.isFixed(ValNo)) {
7024 if (unsigned VReg = State.AllocateReg(VR)) {
7025 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7026 // Shadow allocate GPRs and stack space even though we pass in a VR.
7027 for (unsigned I = 0; I != VecSize; I += PtrSize)
7028 State.AllocateReg(GPRs);
7029 State.AllocateStack(VecSize, VecAlign);
7030 return false;
7031 }
7032 // No vector registers remain so pass on the stack.
7033 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7034 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7035 return false;
7036 }
7037
7038 // If all GPRS are consumed then we pass the argument fully on the stack.
7039 if (NextRegIndex == GPRs.size()) {
7040 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7041 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7042 return false;
7043 }
7044
7045 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7046 // half of the argument, and then need to pass the remaining half on the
7047 // stack.
7048 if (GPRs[NextRegIndex] == PPC::R9) {
7049 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7050 State.addLoc(
7051 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7052
7053 const unsigned FirstReg = State.AllocateReg(PPC::R9);
7054 const unsigned SecondReg = State.AllocateReg(PPC::R10);
7055 assert(FirstReg && SecondReg &&
7056 "Allocating R9 or R10 unexpectedly failed.");
7057 State.addLoc(
7058 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7059 State.addLoc(
7060 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7061 return false;
7062 }
7063
7064 // We have enough GPRs to fully pass the vector argument, and we have
7065 // already consumed any underaligned registers. Start with the custom
7066 // MemLoc and then the custom RegLocs.
7067 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7068 State.addLoc(
7069 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7070 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7071 const unsigned Reg = State.AllocateReg(GPRs);
7072 assert(Reg && "Failed to allocated register for vararg vector argument");
7073 State.addLoc(
7074 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7075 }
7076 return false;
7077 }
7078 }
7079 return true;
7080}
7081
7082// So far, this function is only used by LowerFormalArguments_AIX()
7084 bool IsPPC64,
7085 bool HasP8Vector,
7086 bool HasVSX) {
7087 assert((IsPPC64 || SVT != MVT::i64) &&
7088 "i64 should have been split for 32-bit codegen.");
7089
7090 switch (SVT) {
7091 default:
7092 report_fatal_error("Unexpected value type for formal argument");
7093 case MVT::i1:
7094 case MVT::i32:
7095 case MVT::i64:
7096 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7097 case MVT::f32:
7098 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7099 case MVT::f64:
7100 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7101 case MVT::v4f32:
7102 case MVT::v4i32:
7103 case MVT::v8i16:
7104 case MVT::v16i8:
7105 case MVT::v2i64:
7106 case MVT::v2f64:
7107 case MVT::v1i128:
7108 return &PPC::VRRCRegClass;
7109 }
7110}
7111
7113 SelectionDAG &DAG, SDValue ArgValue,
7114 MVT LocVT, const SDLoc &dl) {
7115 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7116 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7117
7118 if (Flags.isSExt())
7119 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7120 DAG.getValueType(ValVT));
7121 else if (Flags.isZExt())
7122 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7123 DAG.getValueType(ValVT));
7124
7125 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7126}
7127
7128static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7129 const unsigned LASize = FL->getLinkageSize();
7130
7131 if (PPC::GPRCRegClass.contains(Reg)) {
7132 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7133 "Reg must be a valid argument register!");
7134 return LASize + 4 * (Reg - PPC::R3);
7135 }
7136
7137 if (PPC::G8RCRegClass.contains(Reg)) {
7138 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7139 "Reg must be a valid argument register!");
7140 return LASize + 8 * (Reg - PPC::X3);
7141 }
7142
7143 llvm_unreachable("Only general purpose registers expected.");
7144}
7145
7146// AIX ABI Stack Frame Layout:
7147//
7148// Low Memory +--------------------------------------------+
7149// SP +---> | Back chain | ---+
7150// | +--------------------------------------------+ |
7151// | | Saved Condition Register | |
7152// | +--------------------------------------------+ |
7153// | | Saved Linkage Register | |
7154// | +--------------------------------------------+ | Linkage Area
7155// | | Reserved for compilers | |
7156// | +--------------------------------------------+ |
7157// | | Reserved for binders | |
7158// | +--------------------------------------------+ |
7159// | | Saved TOC pointer | ---+
7160// | +--------------------------------------------+
7161// | | Parameter save area |
7162// | +--------------------------------------------+
7163// | | Alloca space |
7164// | +--------------------------------------------+
7165// | | Local variable space |
7166// | +--------------------------------------------+
7167// | | Float/int conversion temporary |
7168// | +--------------------------------------------+
7169// | | Save area for AltiVec registers |
7170// | +--------------------------------------------+
7171// | | AltiVec alignment padding |
7172// | +--------------------------------------------+
7173// | | Save area for VRSAVE register |
7174// | +--------------------------------------------+
7175// | | Save area for General Purpose registers |
7176// | +--------------------------------------------+
7177// | | Save area for Floating Point registers |
7178// | +--------------------------------------------+
7179// +---- | Back chain |
7180// High Memory +--------------------------------------------+
7181//
7182// Specifications:
7183// AIX 7.2 Assembler Language Reference
7184// Subroutine linkage convention
7185
7186SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7187 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7188 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7189 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7190
7191 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7192 CallConv == CallingConv::Fast) &&
7193 "Unexpected calling convention!");
7194
7195 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7196 report_fatal_error("Tail call support is unimplemented on AIX.");
7197
7198 if (useSoftFloat())
7199 report_fatal_error("Soft float support is unimplemented on AIX.");
7200
7201 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7202
7203 const bool IsPPC64 = Subtarget.isPPC64();
7204 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7205
7206 // Assign locations to all of the incoming arguments.
7209 MachineFrameInfo &MFI = MF.getFrameInfo();
7210 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7211 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7212
7213 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7214 // Reserve space for the linkage area on the stack.
7215 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7216 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7217 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7218
7220
7221 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7222 CCValAssign &VA = ArgLocs[I++];
7223 MVT LocVT = VA.getLocVT();
7224 MVT ValVT = VA.getValVT();
7225 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7226 // For compatibility with the AIX XL compiler, the float args in the
7227 // parameter save area are initialized even if the argument is available
7228 // in register. The caller is required to initialize both the register
7229 // and memory, however, the callee can choose to expect it in either.
7230 // The memloc is dismissed here because the argument is retrieved from
7231 // the register.
7232 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7233 continue;
7234
7235 auto HandleMemLoc = [&]() {
7236 const unsigned LocSize = LocVT.getStoreSize();
7237 const unsigned ValSize = ValVT.getStoreSize();
7238 assert((ValSize <= LocSize) &&
7239 "Object size is larger than size of MemLoc");
7240 int CurArgOffset = VA.getLocMemOffset();
7241 // Objects are right-justified because AIX is big-endian.
7242 if (LocSize > ValSize)
7243 CurArgOffset += LocSize - ValSize;
7244 // Potential tail calls could cause overwriting of argument stack slots.
7245 const bool IsImmutable =
7247 (CallConv == CallingConv::Fast));
7248 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7249 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7250 SDValue ArgValue =
7251 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7252 InVals.push_back(ArgValue);
7253 };
7254
7255 // Vector arguments to VaArg functions are passed both on the stack, and
7256 // in any available GPRs. Load the value from the stack and add the GPRs
7257 // as live ins.
7258 if (VA.isMemLoc() && VA.needsCustom()) {
7259 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7260 assert(isVarArg && "Only use custom memloc for vararg.");
7261 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7262 // matching custom RegLocs.
7263 const unsigned OriginalValNo = VA.getValNo();
7264 (void)OriginalValNo;
7265
7266 auto HandleCustomVecRegLoc = [&]() {
7267 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7268 "Missing custom RegLoc.");
7269 VA = ArgLocs[I++];
7270 assert(VA.getValVT().isVector() &&
7271 "Unexpected Val type for custom RegLoc.");
7272 assert(VA.getValNo() == OriginalValNo &&
7273 "ValNo mismatch between custom MemLoc and RegLoc.");
7275 MF.addLiveIn(VA.getLocReg(),
7276 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7277 Subtarget.hasVSX()));
7278 };
7279
7280 HandleMemLoc();
7281 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7282 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7283 // R10.
7284 HandleCustomVecRegLoc();
7285 HandleCustomVecRegLoc();
7286
7287 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7288 // we passed the vector in R5, R6, R7 and R8.
7289 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7290 assert(!IsPPC64 &&
7291 "Only 2 custom RegLocs expected for 64-bit codegen.");
7292 HandleCustomVecRegLoc();
7293 HandleCustomVecRegLoc();
7294 }
7295
7296 continue;
7297 }
7298
7299 if (VA.isRegLoc()) {
7300 if (VA.getValVT().isScalarInteger())
7302 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7303 switch (VA.getValVT().SimpleTy) {
7304 default:
7305 report_fatal_error("Unhandled value type for argument.");
7306 case MVT::f32:
7308 break;
7309 case MVT::f64:
7311 break;
7312 }
7313 } else if (VA.getValVT().isVector()) {
7314 switch (VA.getValVT().SimpleTy) {
7315 default:
7316 report_fatal_error("Unhandled value type for argument.");
7317 case MVT::v16i8:
7319 break;
7320 case MVT::v8i16:
7322 break;
7323 case MVT::v4i32:
7324 case MVT::v2i64:
7325 case MVT::v1i128:
7327 break;
7328 case MVT::v4f32:
7329 case MVT::v2f64:
7331 break;
7332 }
7333 }
7334 }
7335
7336 if (Flags.isByVal() && VA.isMemLoc()) {
7337 const unsigned Size =
7338 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7339 PtrByteSize);
7340 const int FI = MF.getFrameInfo().CreateFixedObject(
7341 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7342 /* IsAliased */ true);
7343 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7344 InVals.push_back(FIN);
7345
7346 continue;
7347 }
7348
7349 if (Flags.isByVal()) {
7350 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7351
7352 const MCPhysReg ArgReg = VA.getLocReg();
7353 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7354
7355 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7356 const int FI = MF.getFrameInfo().CreateFixedObject(
7357 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7358 /* IsAliased */ true);
7359 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7360 InVals.push_back(FIN);
7361
7362 // Add live ins for all the RegLocs for the same ByVal.
7363 const TargetRegisterClass *RegClass =
7364 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7365
7366 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7367 unsigned Offset) {
7368 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7369 // Since the callers side has left justified the aggregate in the
7370 // register, we can simply store the entire register into the stack
7371 // slot.
7372 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7373 // The store to the fixedstack object is needed becuase accessing a
7374 // field of the ByVal will use a gep and load. Ideally we will optimize
7375 // to extracting the value from the register directly, and elide the
7376 // stores when the arguments address is not taken, but that will need to
7377 // be future work.
7378 SDValue Store = DAG.getStore(
7379 CopyFrom.getValue(1), dl, CopyFrom,
7382
7383 MemOps.push_back(Store);
7384 };
7385
7386 unsigned Offset = 0;
7387 HandleRegLoc(VA.getLocReg(), Offset);
7388 Offset += PtrByteSize;
7389 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7390 Offset += PtrByteSize) {
7391 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7392 "RegLocs should be for ByVal argument.");
7393
7394 const CCValAssign RL = ArgLocs[I++];
7395 HandleRegLoc(RL.getLocReg(), Offset);
7397 }
7398
7399 if (Offset != StackSize) {
7400 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7401 "Expected MemLoc for remaining bytes.");
7402 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7403 // Consume the MemLoc.The InVal has already been emitted, so nothing
7404 // more needs to be done.
7405 ++I;
7406 }
7407
7408 continue;
7409 }
7410
7411 if (VA.isRegLoc() && !VA.needsCustom()) {
7412 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7413 Register VReg =
7414 MF.addLiveIn(VA.getLocReg(),
7415 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7416 Subtarget.hasVSX()));
7417 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7418 if (ValVT.isScalarInteger() &&
7419 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7420 ArgValue =
7421 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7422 }
7423 InVals.push_back(ArgValue);
7424 continue;
7425 }
7426 if (VA.isMemLoc()) {
7427 HandleMemLoc();
7428 continue;
7429 }
7430 }
7431
7432 // On AIX a minimum of 8 words is saved to the parameter save area.
7433 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7434 // Area that is at least reserved in the caller of this function.
7435 unsigned CallerReservedArea = std::max<unsigned>(
7436 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7437
7438 // Set the size that is at least reserved in caller of this function. Tail
7439 // call optimized function's reserved stack space needs to be aligned so
7440 // that taking the difference between two stack areas will result in an
7441 // aligned stack.
7442 CallerReservedArea =
7443 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7444 FuncInfo->setMinReservedArea(CallerReservedArea);
7445
7446 if (isVarArg) {
7447 FuncInfo->setVarArgsFrameIndex(
7448 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7449 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7450
7451 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7452 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7453
7454 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7455 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7456 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7457
7458 // The fixed integer arguments of a variadic function are stored to the
7459 // VarArgsFrameIndex on the stack so that they may be loaded by
7460 // dereferencing the result of va_next.
7461 for (unsigned GPRIndex =
7462 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7463 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7464
7465 const Register VReg =
7466 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7467 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7468
7469 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7470 SDValue Store =
7471 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7472 MemOps.push_back(Store);
7473 // Increment the address for the next argument to store.
7474 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7475 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7476 }
7477 }
7478
7479 if (!MemOps.empty())
7480 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7481
7482 return Chain;
7483}
7484
7485SDValue PPCTargetLowering::LowerCall_AIX(
7486 SDValue Chain, SDValue Callee, CallFlags CFlags,
7488 const SmallVectorImpl<SDValue> &OutVals,
7489 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7491 const CallBase *CB) const {
7492 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7493 // AIX ABI stack frame layout.
7494
7495 assert((CFlags.CallConv == CallingConv::C ||
7496 CFlags.CallConv == CallingConv::Cold ||
7497 CFlags.CallConv == CallingConv::Fast) &&
7498 "Unexpected calling convention!");
7499
7500 if (CFlags.IsPatchPoint)
7501 report_fatal_error("This call type is unimplemented on AIX.");
7502
7503 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7504
7507 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7508 *DAG.getContext());
7509
7510 // Reserve space for the linkage save area (LSA) on the stack.
7511 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7512 // [SP][CR][LR][2 x reserved][TOC].
7513 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7514 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7515 const bool IsPPC64 = Subtarget.isPPC64();
7516 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7517 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7518 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7519 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7520
7521 // The prolog code of the callee may store up to 8 GPR argument registers to
7522 // the stack, allowing va_start to index over them in memory if the callee
7523 // is variadic.
7524 // Because we cannot tell if this is needed on the caller side, we have to
7525 // conservatively assume that it is needed. As such, make sure we have at
7526 // least enough stack space for the caller to store the 8 GPRs.
7527 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7528 const unsigned NumBytes = std::max<unsigned>(
7529 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7530
7531 // Adjust the stack pointer for the new arguments...
7532 // These operations are automatically eliminated by the prolog/epilog pass.
7533 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7534 SDValue CallSeqStart = Chain;
7535
7537 SmallVector<SDValue, 8> MemOpChains;
7538
7539 // Set up a copy of the stack pointer for loading and storing any
7540 // arguments that may not fit in the registers available for argument
7541 // passing.
7542 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7543 : DAG.getRegister(PPC::R1, MVT::i32);
7544
7545 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7546 const unsigned ValNo = ArgLocs[I].getValNo();
7547 SDValue Arg = OutVals[ValNo];
7548 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7549
7550 if (Flags.isByVal()) {
7551 const unsigned ByValSize = Flags.getByValSize();
7552
7553 // Nothing to do for zero-sized ByVals on the caller side.
7554 if (!ByValSize) {
7555 ++I;
7556 continue;
7557 }
7558
7559 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7560 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7561 (LoadOffset != 0)
7562 ? DAG.getObjectPtrOffset(
7563 dl, Arg, TypeSize::getFixed(LoadOffset))
7564 : Arg,
7565 MachinePointerInfo(), VT);
7566 };
7567
7568 unsigned LoadOffset = 0;
7569
7570 // Initialize registers, which are fully occupied by the by-val argument.
7571 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7572 SDValue Load = GetLoad(PtrVT, LoadOffset);
7573 MemOpChains.push_back(Load.getValue(1));
7574 LoadOffset += PtrByteSize;
7575 const CCValAssign &ByValVA = ArgLocs[I++];
7576 assert(ByValVA.getValNo() == ValNo &&
7577 "Unexpected location for pass-by-value argument.");
7578 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7579 }
7580
7581 if (LoadOffset == ByValSize)
7582 continue;
7583
7584 // There must be one more loc to handle the remainder.
7585 assert(ArgLocs[I].getValNo() == ValNo &&
7586 "Expected additional location for by-value argument.");
7587
7588 if (ArgLocs[I].isMemLoc()) {
7589 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7590 const CCValAssign &ByValVA = ArgLocs[I++];
7591 ISD::ArgFlagsTy MemcpyFlags = Flags;
7592 // Only memcpy the bytes that don't pass in register.
7593 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7594 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7595 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7596 dl, Arg, TypeSize::getFixed(LoadOffset))
7597 : Arg,
7599 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7600 CallSeqStart, MemcpyFlags, DAG, dl);
7601 continue;
7602 }
7603
7604 // Initialize the final register residue.
7605 // Any residue that occupies the final by-val arg register must be
7606 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7607 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7608 // 2 and 1 byte loads.
7609 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7610 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7611 "Unexpected register residue for by-value argument.");
7612 SDValue ResidueVal;
7613 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7614 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7615 const MVT VT =
7616 N == 1 ? MVT::i8
7617 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7618 SDValue Load = GetLoad(VT, LoadOffset);
7619 MemOpChains.push_back(Load.getValue(1));
7620 LoadOffset += N;
7621 Bytes += N;
7622
7623 // By-val arguments are passed left-justfied in register.
7624 // Every load here needs to be shifted, otherwise a full register load
7625 // should have been used.
7626 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7627 "Unexpected load emitted during handling of pass-by-value "
7628 "argument.");
7629 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7630 EVT ShiftAmountTy =
7631 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7632 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7633 SDValue ShiftedLoad =
7634 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7635 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7636 ShiftedLoad)
7637 : ShiftedLoad;
7638 }
7639
7640 const CCValAssign &ByValVA = ArgLocs[I++];
7641 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7642 continue;
7643 }
7644
7645 CCValAssign &VA = ArgLocs[I++];
7646 const MVT LocVT = VA.getLocVT();
7647 const MVT ValVT = VA.getValVT();
7648
7649 switch (VA.getLocInfo()) {
7650 default:
7651 report_fatal_error("Unexpected argument extension type.");
7652 case CCValAssign::Full:
7653 break;
7654 case CCValAssign::ZExt:
7655 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7656 break;
7657 case CCValAssign::SExt:
7658 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7659 break;
7660 }
7661
7662 if (VA.isRegLoc() && !VA.needsCustom()) {
7663 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7664 continue;
7665 }
7666
7667 // Vector arguments passed to VarArg functions need custom handling when
7668 // they are passed (at least partially) in GPRs.
7669 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7670 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7671 // Store value to its stack slot.
7672 SDValue PtrOff =
7673 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7674 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7675 SDValue Store =
7676 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7677 MemOpChains.push_back(Store);
7678 const unsigned OriginalValNo = VA.getValNo();
7679 // Then load the GPRs from the stack
7680 unsigned LoadOffset = 0;
7681 auto HandleCustomVecRegLoc = [&]() {
7682 assert(I != E && "Unexpected end of CCvalAssigns.");
7683 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7684 "Expected custom RegLoc.");
7685 CCValAssign RegVA = ArgLocs[I++];
7686 assert(RegVA.getValNo() == OriginalValNo &&
7687 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7688 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7689 DAG.getConstant(LoadOffset, dl, PtrVT));
7690 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7691 MemOpChains.push_back(Load.getValue(1));
7692 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7693 LoadOffset += PtrByteSize;
7694 };
7695
7696 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7697 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7698 // R10.
7699 HandleCustomVecRegLoc();
7700 HandleCustomVecRegLoc();
7701
7702 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7703 ArgLocs[I].getValNo() == OriginalValNo) {
7704 assert(!IsPPC64 &&
7705 "Only 2 custom RegLocs expected for 64-bit codegen.");
7706 HandleCustomVecRegLoc();
7707 HandleCustomVecRegLoc();
7708 }
7709
7710 continue;
7711 }
7712
7713 if (VA.isMemLoc()) {
7714 SDValue PtrOff =
7715 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7716 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7717 MemOpChains.push_back(
7718 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7719
7720 continue;
7721 }
7722
7723 if (!ValVT.isFloatingPoint())
7725 "Unexpected register handling for calling convention.");
7726
7727 // Custom handling is used for GPR initializations for vararg float
7728 // arguments.
7729 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7730 LocVT.isInteger() &&
7731 "Custom register handling only expected for VarArg.");
7732
7733 SDValue ArgAsInt =
7734 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7735
7736 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7737 // f32 in 32-bit GPR
7738 // f64 in 64-bit GPR
7739 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7740 else if (Arg.getValueType().getFixedSizeInBits() <
7741 LocVT.getFixedSizeInBits())
7742 // f32 in 64-bit GPR.
7743 RegsToPass.push_back(std::make_pair(
7744 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7745 else {
7746 // f64 in two 32-bit GPRs
7747 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7748 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7749 "Unexpected custom register for argument!");
7750 CCValAssign &GPR1 = VA;
7751 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7752 DAG.getConstant(32, dl, MVT::i8));
7753 RegsToPass.push_back(std::make_pair(
7754 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7755
7756 if (I != E) {
7757 // If only 1 GPR was available, there will only be one custom GPR and
7758 // the argument will also pass in memory.
7759 CCValAssign &PeekArg = ArgLocs[I];
7760 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7761 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7762 CCValAssign &GPR2 = ArgLocs[I++];
7763 RegsToPass.push_back(std::make_pair(
7764 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7765 }
7766 }
7767 }
7768 }
7769
7770 if (!MemOpChains.empty())
7771 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7772
7773 // For indirect calls, we need to save the TOC base to the stack for
7774 // restoration after the call.
7775 if (CFlags.IsIndirect) {
7776 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7777 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7778 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7779 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7780 const unsigned TOCSaveOffset =
7781 Subtarget.getFrameLowering()->getTOCSaveOffset();
7782
7783 setUsesTOCBasePtr(DAG);
7784 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7785 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7786 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7787 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7788 Chain = DAG.getStore(
7789 Val.getValue(1), dl, Val, AddPtr,
7790 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7791 }
7792
7793 // Build a sequence of copy-to-reg nodes chained together with token chain
7794 // and flag operands which copy the outgoing args into the appropriate regs.
7795 SDValue InGlue;
7796 for (auto Reg : RegsToPass) {
7797 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7798 InGlue = Chain.getValue(1);
7799 }
7800
7801 const int SPDiff = 0;
7802 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7803 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7804}
7805
7806bool
7807PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7808 MachineFunction &MF, bool isVarArg,
7810 LLVMContext &Context) const {
7812 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7813 return CCInfo.CheckReturn(
7814 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7816 : RetCC_PPC);
7817}
7818
7819SDValue
7820PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7821 bool isVarArg,
7823 const SmallVectorImpl<SDValue> &OutVals,
7824 const SDLoc &dl, SelectionDAG &DAG) const {
7826 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7827 *DAG.getContext());
7828 CCInfo.AnalyzeReturn(Outs,
7829 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7831 : RetCC_PPC);
7832
7833 SDValue Glue;
7834 SmallVector<SDValue, 4> RetOps(1, Chain);
7835
7836 // Copy the result values into the output registers.
7837 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7838 CCValAssign &VA = RVLocs[i];
7839 assert(VA.isRegLoc() && "Can only return in registers!");
7840
7841 SDValue Arg = OutVals[RealResIdx];
7842
7843 switch (VA.getLocInfo()) {
7844 default: llvm_unreachable("Unknown loc info!");
7845 case CCValAssign::Full: break;
7846 case CCValAssign::AExt:
7847 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7848 break;
7849 case CCValAssign::ZExt:
7850 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7851 break;
7852 case CCValAssign::SExt:
7853 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7854 break;
7855 }
7856 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7857 bool isLittleEndian = Subtarget.isLittleEndian();
7858 // Legalize ret f64 -> ret 2 x i32.
7859 SDValue SVal =
7860 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7861 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7862 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7863 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7864 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7865 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7866 Glue = Chain.getValue(1);
7867 VA = RVLocs[++i]; // skip ahead to next loc
7868 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7869 } else
7870 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7871 Glue = Chain.getValue(1);
7872 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7873 }
7874
7875 RetOps[0] = Chain; // Update chain.
7876
7877 // Add the glue if we have it.
7878 if (Glue.getNode())
7879 RetOps.push_back(Glue);
7880
7881 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7882}
7883
7884SDValue
7885PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7886 SelectionDAG &DAG) const {
7887 SDLoc dl(Op);
7888
7889 // Get the correct type for integers.
7890 EVT IntVT = Op.getValueType();
7891
7892 // Get the inputs.
7893 SDValue Chain = Op.getOperand(0);
7894 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7895 // Build a DYNAREAOFFSET node.
7896 SDValue Ops[2] = {Chain, FPSIdx};
7897 SDVTList VTs = DAG.getVTList(IntVT);
7898 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7899}
7900
7901SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7902 SelectionDAG &DAG) const {
7903 // When we pop the dynamic allocation we need to restore the SP link.
7904 SDLoc dl(Op);
7905
7906 // Get the correct type for pointers.
7907 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7908
7909 // Construct the stack pointer operand.
7910 bool isPPC64 = Subtarget.isPPC64();
7911 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7912 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7913
7914 // Get the operands for the STACKRESTORE.
7915 SDValue Chain = Op.getOperand(0);
7916 SDValue SaveSP = Op.getOperand(1);
7917
7918 // Load the old link SP.
7919 SDValue LoadLinkSP =
7920 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7921
7922 // Restore the stack pointer.
7923 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7924
7925 // Store the old link SP.
7926 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7927}
7928
7929SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7931 bool isPPC64 = Subtarget.isPPC64();
7932 EVT PtrVT = getPointerTy(MF.getDataLayout());
7933
7934 // Get current frame pointer save index. The users of this index will be
7935 // primarily DYNALLOC instructions.
7937 int RASI = FI->getReturnAddrSaveIndex();
7938
7939 // If the frame pointer save index hasn't been defined yet.
7940 if (!RASI) {
7941 // Find out what the fix offset of the frame pointer save area.
7942 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7943 // Allocate the frame index for frame pointer save area.
7944 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7945 // Save the result.
7946 FI->setReturnAddrSaveIndex(RASI);
7947 }
7948 return DAG.getFrameIndex(RASI, PtrVT);
7949}
7950
7951SDValue
7952PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7954 bool isPPC64 = Subtarget.isPPC64();
7955 EVT PtrVT = getPointerTy(MF.getDataLayout());
7956
7957 // Get current frame pointer save index. The users of this index will be
7958 // primarily DYNALLOC instructions.
7960 int FPSI = FI->getFramePointerSaveIndex();
7961
7962 // If the frame pointer save index hasn't been defined yet.
7963 if (!FPSI) {
7964 // Find out what the fix offset of the frame pointer save area.
7965 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7966 // Allocate the frame index for frame pointer save area.
7967 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7968 // Save the result.
7969 FI->setFramePointerSaveIndex(FPSI);
7970 }
7971 return DAG.getFrameIndex(FPSI, PtrVT);
7972}
7973
7974SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7975 SelectionDAG &DAG) const {
7977 // Get the inputs.
7978 SDValue Chain = Op.getOperand(0);
7979 SDValue Size = Op.getOperand(1);
7980 SDLoc dl(Op);
7981
7982 // Get the correct type for pointers.
7983 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7984 // Negate the size.
7985 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7986 DAG.getConstant(0, dl, PtrVT), Size);
7987 // Construct a node for the frame pointer save index.
7988 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7989 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7990 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7991 if (hasInlineStackProbe(MF))
7992 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7993 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7994}
7995
7996SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7997 SelectionDAG &DAG) const {
7999
8000 bool isPPC64 = Subtarget.isPPC64();
8001 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8002
8003 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8004 return DAG.getFrameIndex(FI, PtrVT);
8005}
8006
8007SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8008 SelectionDAG &DAG) const {
8009 SDLoc DL(Op);
8010 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8011 DAG.getVTList(MVT::i32, MVT::Other),
8012 Op.getOperand(0), Op.getOperand(1));
8013}
8014
8015SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8016 SelectionDAG &DAG) const {
8017 SDLoc DL(Op);
8018 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8019 Op.getOperand(0), Op.getOperand(1));
8020}
8021
8022SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8023 if (Op.getValueType().isVector())
8024 return LowerVectorLoad(Op, DAG);
8025
8026 assert(Op.getValueType() == MVT::i1 &&
8027 "Custom lowering only for i1 loads");
8028
8029 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8030
8031 SDLoc dl(Op);
8032 LoadSDNode *LD = cast<LoadSDNode>(Op);
8033
8034 SDValue Chain = LD->getChain();
8035 SDValue BasePtr = LD->getBasePtr();
8036 MachineMemOperand *MMO = LD->getMemOperand();
8037
8038 SDValue NewLD =
8039 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8040 BasePtr, MVT::i8, MMO);
8041 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8042
8043 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8044 return DAG.getMergeValues(Ops, dl);
8045}
8046
8047SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8048 if (Op.getOperand(1).getValueType().isVector())
8049 return LowerVectorStore(Op, DAG);
8050
8051 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8052 "Custom lowering only for i1 stores");
8053
8054 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8055
8056 SDLoc dl(Op);
8057 StoreSDNode *ST = cast<StoreSDNode>(Op);
8058
8059 SDValue Chain = ST->getChain();
8060 SDValue BasePtr = ST->getBasePtr();
8061 SDValue Value = ST->getValue();
8062 MachineMemOperand *MMO = ST->getMemOperand();
8063
8065 Value);
8066 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8067}
8068
8069// FIXME: Remove this once the ANDI glue bug is fixed:
8070SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8071 assert(Op.getValueType() == MVT::i1 &&
8072 "Custom lowering only for i1 results");
8073
8074 SDLoc DL(Op);
8075 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8076}
8077
8078SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8079 SelectionDAG &DAG) const {
8080
8081 // Implements a vector truncate that fits in a vector register as a shuffle.
8082 // We want to legalize vector truncates down to where the source fits in
8083 // a vector register (and target is therefore smaller than vector register
8084 // size). At that point legalization will try to custom lower the sub-legal
8085 // result and get here - where we can contain the truncate as a single target
8086 // operation.
8087
8088 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8089 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8090 //
8091 // We will implement it for big-endian ordering as this (where x denotes
8092 // undefined):
8093 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8094 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8095 //
8096 // The same operation in little-endian ordering will be:
8097 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8098 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8099
8100 EVT TrgVT = Op.getValueType();
8101 assert(TrgVT.isVector() && "Vector type expected.");
8102 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8103 EVT EltVT = TrgVT.getVectorElementType();
8104 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8105 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8106 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8107 return SDValue();
8108
8109 SDValue N1 = Op.getOperand(0);
8110 EVT SrcVT = N1.getValueType();
8111 unsigned SrcSize = SrcVT.getSizeInBits();
8112 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8113 !llvm::has_single_bit<uint32_t>(
8115 return SDValue();
8116 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8117 return SDValue();
8118
8119 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8120 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8121
8122 SDLoc DL(Op);
8123 SDValue Op1, Op2;
8124 if (SrcSize == 256) {
8125 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8126 EVT SplitVT =
8128 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8129 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8130 DAG.getConstant(0, DL, VecIdxTy));
8131 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8132 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8133 }
8134 else {
8135 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8136 Op2 = DAG.getUNDEF(WideVT);
8137 }
8138
8139 // First list the elements we want to keep.
8140 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8141 SmallVector<int, 16> ShuffV;
8142 if (Subtarget.isLittleEndian())
8143 for (unsigned i = 0; i < TrgNumElts; ++i)
8144 ShuffV.push_back(i * SizeMult);
8145 else
8146 for (unsigned i = 1; i <= TrgNumElts; ++i)
8147 ShuffV.push_back(i * SizeMult - 1);
8148
8149 // Populate the remaining elements with undefs.
8150 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8151 // ShuffV.push_back(i + WideNumElts);
8152 ShuffV.push_back(WideNumElts + 1);
8153
8154 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8155 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8156 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8157}
8158
8159/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8160/// possible.
8161SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8162 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8163 EVT ResVT = Op.getValueType();
8164 EVT CmpVT = Op.getOperand(0).getValueType();
8165 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8166 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8167 SDLoc dl(Op);
8168
8169 // Without power9-vector, we don't have native instruction for f128 comparison.
8170 // Following transformation to libcall is needed for setcc:
8171 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8172 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8173 SDValue Z = DAG.getSetCC(
8174 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8175 LHS, RHS, CC);
8176 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8177 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8178 }
8179
8180 // Not FP, or using SPE? Not a fsel.
8181 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8182 Subtarget.hasSPE())
8183 return Op;
8184
8185 SDNodeFlags Flags = Op.getNode()->getFlags();
8186
8187 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8188 // presence of infinities.
8189 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8190 switch (CC) {
8191 default:
8192 break;
8193 case ISD::SETOGT:
8194 case ISD::SETGT:
8195 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8196 case ISD::SETOLT:
8197 case ISD::SETLT:
8198 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8199 }
8200 }
8201
8202 // We might be able to do better than this under some circumstances, but in
8203 // general, fsel-based lowering of select is a finite-math-only optimization.
8204 // For more information, see section F.3 of the 2.06 ISA specification.
8205 // With ISA 3.0
8206 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8207 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8208 ResVT == MVT::f128)
8209 return Op;
8210
8211 // If the RHS of the comparison is a 0.0, we don't need to do the
8212 // subtraction at all.
8213 SDValue Sel1;
8214 if (isFloatingPointZero(RHS))
8215 switch (CC) {
8216 default: break; // SETUO etc aren't handled by fsel.
8217 case ISD::SETNE:
8218 std::swap(TV, FV);
8219 [[fallthrough]];
8220 case ISD::SETEQ:
8221 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8222 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8223 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8224 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8225 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8226 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8227 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8228 case ISD::SETULT:
8229 case ISD::SETLT:
8230 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8231 [[fallthrough]];
8232 case ISD::SETOGE:
8233 case ISD::SETGE:
8234 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8235 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8236 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8237 case ISD::SETUGT:
8238 case ISD::SETGT:
8239 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8240 [[fallthrough]];
8241 case ISD::SETOLE:
8242 case ISD::SETLE:
8243 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8244 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8245 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8246 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8247 }
8248
8249 SDValue Cmp;
8250 switch (CC) {
8251 default: break; // SETUO etc aren't handled by fsel.
8252 case ISD::SETNE:
8253 std::swap(TV, FV);
8254 [[fallthrough]];
8255 case ISD::SETEQ:
8256 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8257 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8258 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8259 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8260 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8261 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8262 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8263 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8264 case ISD::SETULT:
8265 case ISD::SETLT:
8266 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8267 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8268 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8269 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8270 case ISD::SETOGE:
8271 case ISD::SETGE:
8272 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8273 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8274 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8275 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8276 case ISD::SETUGT:
8277 case ISD::SETGT:
8278 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8279 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8280 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8281 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8282 case ISD::SETOLE:
8283 case ISD::SETLE:
8284 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8285 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8286 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8287 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8288 }
8289 return Op;
8290}
8291
8292static unsigned getPPCStrictOpcode(unsigned Opc) {
8293 switch (Opc) {
8294 default:
8295 llvm_unreachable("No strict version of this opcode!");
8296 case PPCISD::FCTIDZ:
8297 return PPCISD::STRICT_FCTIDZ;
8298 case PPCISD::FCTIWZ:
8299 return PPCISD::STRICT_FCTIWZ;
8300 case PPCISD::FCTIDUZ:
8302 case PPCISD::FCTIWUZ:
8304 case PPCISD::FCFID:
8305 return PPCISD::STRICT_FCFID;
8306 case PPCISD::FCFIDU:
8307 return PPCISD::STRICT_FCFIDU;
8308 case PPCISD::FCFIDS:
8309 return PPCISD::STRICT_FCFIDS;
8310 case PPCISD::FCFIDUS:
8312 }
8313}
8314
8316 const PPCSubtarget &Subtarget) {
8317 SDLoc dl(Op);
8318 bool IsStrict = Op->isStrictFPOpcode();
8319 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8320 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8321
8322 // TODO: Any other flags to propagate?
8323 SDNodeFlags Flags;
8324 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8325
8326 // For strict nodes, source is the second operand.
8327 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8328 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8329 MVT DestTy = Op.getSimpleValueType();
8330 assert(Src.getValueType().isFloatingPoint() &&
8331 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8332 DestTy == MVT::i64) &&
8333 "Invalid FP_TO_INT types");
8334 if (Src.getValueType() == MVT::f32) {
8335 if (IsStrict) {
8336 Src =
8338 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8339 Chain = Src.getValue(1);
8340 } else
8341 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8342 }
8343 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8344 DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8345 unsigned Opc = ISD::DELETED_NODE;
8346 switch (DestTy.SimpleTy) {
8347 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8348 case MVT::i32:
8349 Opc = IsSigned ? PPCISD::FCTIWZ
8350 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8351 break;
8352 case MVT::i64:
8353 assert((IsSigned || Subtarget.hasFPCVT()) &&
8354 "i64 FP_TO_UINT is supported only with FPCVT");
8355 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8356 }
8357 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8358 SDValue Conv;
8359 if (IsStrict) {
8360 Opc = getPPCStrictOpcode(Opc);
8361 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8362 Flags);
8363 } else {
8364 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8365 }
8366 return Conv;
8367}
8368
8369void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8370 SelectionDAG &DAG,
8371 const SDLoc &dl) const {
8372 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8373 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8374 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8375 bool IsStrict = Op->isStrictFPOpcode();
8376
8377 // Convert the FP value to an int value through memory.
8378 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8379 (IsSigned || Subtarget.hasFPCVT());
8380 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8381 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8382 MachinePointerInfo MPI =
8384
8385 // Emit a store to the stack slot.
8386 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8387 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8388 if (i32Stack) {
8390 Alignment = Align(4);
8391 MachineMemOperand *MMO =
8392 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8393 SDValue Ops[] = { Chain, Tmp, FIPtr };
8394 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8395 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8396 } else
8397 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8398
8399 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8400 // add in a bias on big endian.
8401 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8402 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8403 DAG.getConstant(4, dl, FIPtr.getValueType()));
8404 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8405 }
8406
8407 RLI.Chain = Chain;
8408 RLI.Ptr = FIPtr;
8409 RLI.MPI = MPI;
8410 RLI.Alignment = Alignment;
8411}
8412
8413/// Custom lowers floating point to integer conversions to use
8414/// the direct move instructions available in ISA 2.07 to avoid the
8415/// need for load/store combinations.
8416SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8417 SelectionDAG &DAG,
8418 const SDLoc &dl) const {
8419 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8420 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8421 if (Op->isStrictFPOpcode())
8422 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8423 else
8424 return Mov;
8425}
8426
8427SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8428 const SDLoc &dl) const {
8429 bool IsStrict = Op->isStrictFPOpcode();
8430 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8431 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8432 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8433 EVT SrcVT = Src.getValueType();
8434 EVT DstVT = Op.getValueType();
8435
8436 // FP to INT conversions are legal for f128.
8437 if (SrcVT == MVT::f128)
8438 return Subtarget.hasP9Vector() ? Op : SDValue();
8439
8440 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8441 // PPC (the libcall is not available).
8442 if (SrcVT == MVT::ppcf128) {
8443 if (DstVT == MVT::i32) {
8444 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8445 // set other fast-math flags to FP operations in both strict and
8446 // non-strict cases. (FP_TO_SINT, FSUB)
8448 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8449
8450 if (IsSigned) {
8451 SDValue Lo, Hi;
8452 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8453
8454 // Add the two halves of the long double in round-to-zero mode, and use
8455 // a smaller FP_TO_SINT.
8456 if (IsStrict) {
8458 DAG.getVTList(MVT::f64, MVT::Other),
8459 {Op.getOperand(0), Lo, Hi}, Flags);
8460 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8461 DAG.getVTList(MVT::i32, MVT::Other),
8462 {Res.getValue(1), Res}, Flags);
8463 } else {
8464 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8465 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8466 }
8467 } else {
8468 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8469 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8470 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8471 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8472 if (IsStrict) {
8473 // Sel = Src < 0x80000000
8474 // FltOfs = select Sel, 0.0, 0x80000000
8475 // IntOfs = select Sel, 0, 0x80000000
8476 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8477 SDValue Chain = Op.getOperand(0);
8478 EVT SetCCVT =
8479 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8480 EVT DstSetCCVT =
8481 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8482 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8483 Chain, true);
8484 Chain = Sel.getValue(1);
8485
8486 SDValue FltOfs = DAG.getSelect(
8487 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8488 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8489
8490 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8491 DAG.getVTList(SrcVT, MVT::Other),
8492 {Chain, Src, FltOfs}, Flags);
8493 Chain = Val.getValue(1);
8494 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8495 DAG.getVTList(DstVT, MVT::Other),
8496 {Chain, Val}, Flags);
8497 Chain = SInt.getValue(1);
8498 SDValue IntOfs = DAG.getSelect(
8499 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8500 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8501 return DAG.getMergeValues({Result, Chain}, dl);
8502 } else {
8503 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8504 // FIXME: generated code sucks.
8505 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8506 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8507 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8508 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8509 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8510 }
8511 }
8512 }
8513
8514 return SDValue();
8515 }
8516
8517 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8518 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8519
8520 ReuseLoadInfo RLI;
8521 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8522
8523 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8524 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8525}
8526
8527// We're trying to insert a regular store, S, and then a load, L. If the
8528// incoming value, O, is a load, we might just be able to have our load use the
8529// address used by O. However, we don't know if anything else will store to
8530// that address before we can load from it. To prevent this situation, we need
8531// to insert our load, L, into the chain as a peer of O. To do this, we give L
8532// the same chain operand as O, we create a token factor from the chain results
8533// of O and L, and we replace all uses of O's chain result with that token
8534// factor (see spliceIntoChain below for this last part).
8535bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8536 ReuseLoadInfo &RLI,
8537 SelectionDAG &DAG,
8538 ISD::LoadExtType ET) const {
8539 // Conservatively skip reusing for constrained FP nodes.
8540 if (Op->isStrictFPOpcode())
8541 return false;
8542
8543 SDLoc dl(Op);
8544 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8545 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8546 if (ET == ISD::NON_EXTLOAD &&
8547 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8548 isOperationLegalOrCustom(Op.getOpcode(),
8549 Op.getOperand(0).getValueType())) {
8550
8551 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8552 return true;
8553 }
8554
8555 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8556 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8557 LD->isNonTemporal())
8558 return false;
8559 if (LD->getMemoryVT() != MemVT)
8560 return false;
8561
8562 // If the result of the load is an illegal type, then we can't build a
8563 // valid chain for reuse since the legalised loads and token factor node that
8564 // ties the legalised loads together uses a different output chain then the
8565 // illegal load.
8566 if (!isTypeLegal(LD->getValueType(0)))
8567 return false;
8568
8569 RLI.Ptr = LD->getBasePtr();
8570 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8571 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8572 "Non-pre-inc AM on PPC?");
8573 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8574 LD->getOffset());
8575 }
8576
8577 RLI.Chain = LD->getChain();
8578 RLI.MPI = LD->getPointerInfo();
8579 RLI.IsDereferenceable = LD->isDereferenceable();
8580 RLI.IsInvariant = LD->isInvariant();
8581 RLI.Alignment = LD->getAlign();
8582 RLI.AAInfo = LD->getAAInfo();
8583 RLI.Ranges = LD->getRanges();
8584
8585 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8586 return true;
8587}
8588
8589// Given the head of the old chain, ResChain, insert a token factor containing
8590// it and NewResChain, and make users of ResChain now be users of that token
8591// factor.
8592// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8593void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8594 SDValue NewResChain,
8595 SelectionDAG &DAG) const {
8596 if (!ResChain)
8597 return;
8598
8599 SDLoc dl(NewResChain);
8600
8601 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8602 NewResChain, DAG.getUNDEF(MVT::Other));
8603 assert(TF.getNode() != NewResChain.getNode() &&
8604 "A new TF really is required here");
8605
8606 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8607 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8608}
8609
8610/// Analyze profitability of direct move
8611/// prefer float load to int load plus direct move
8612/// when there is no integer use of int load
8613bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8614 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8615 if (Origin->getOpcode() != ISD::LOAD)
8616 return true;
8617
8618 // If there is no LXSIBZX/LXSIHZX, like Power8,
8619 // prefer direct move if the memory size is 1 or 2 bytes.
8620 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8621 if (!Subtarget.hasP9Vector() &&
8622 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8623 return true;
8624
8625 for (SDNode::use_iterator UI = Origin->use_begin(),
8626 UE = Origin->use_end();
8627 UI != UE; ++UI) {
8628
8629 // Only look at the users of the loaded value.
8630 if (UI.getUse().get().getResNo() != 0)
8631 continue;
8632
8633 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8634 UI->getOpcode() != ISD::UINT_TO_FP &&
8635 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8636 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8637 return true;
8638 }
8639
8640 return false;
8641}
8642
8644 const PPCSubtarget &Subtarget,
8645 SDValue Chain = SDValue()) {
8646 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8647 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8648 SDLoc dl(Op);
8649
8650 // TODO: Any other flags to propagate?
8651 SDNodeFlags Flags;
8652 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8653
8654 // If we have FCFIDS, then use it when converting to single-precision.
8655 // Otherwise, convert to double-precision and then round.
8656 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8657 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8658 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8659 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8660 if (Op->isStrictFPOpcode()) {
8661 if (!Chain)
8662 Chain = Op.getOperand(0);
8663 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8664 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8665 } else
8666 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8667}
8668
8669/// Custom lowers integer to floating point conversions to use
8670/// the direct move instructions available in ISA 2.07 to avoid the
8671/// need for load/store combinations.
8672SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8673 SelectionDAG &DAG,
8674 const SDLoc &dl) const {
8675 assert((Op.getValueType() == MVT::f32 ||
8676 Op.getValueType() == MVT::f64) &&
8677 "Invalid floating point type as target of conversion");
8678 assert(Subtarget.hasFPCVT() &&
8679 "Int to FP conversions with direct moves require FPCVT");
8680 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8681 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8682 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8683 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8684 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8685 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8686 return convertIntToFP(Op, Mov, DAG, Subtarget);
8687}
8688
8689static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8690
8691 EVT VecVT = Vec.getValueType();
8692 assert(VecVT.isVector() && "Expected a vector type.");
8693 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8694
8695 EVT EltVT = VecVT.getVectorElementType();
8696 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8697 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8698
8699 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8700 SmallVector<SDValue, 16> Ops(NumConcat);
8701 Ops[0] = Vec;
8702 SDValue UndefVec = DAG.getUNDEF(VecVT);
8703 for (unsigned i = 1; i < NumConcat; ++i)
8704 Ops[i] = UndefVec;
8705
8706 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8707}
8708
8709SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8710 const SDLoc &dl) const {
8711 bool IsStrict = Op->isStrictFPOpcode();
8712 unsigned Opc = Op.getOpcode();
8713 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8714 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8716 "Unexpected conversion type");
8717 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8718 "Supports conversions to v2f64/v4f32 only.");
8719
8720 // TODO: Any other flags to propagate?
8722 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8723
8724 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8725 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8726
8727 SDValue Wide = widenVec(DAG, Src, dl);
8728 EVT WideVT = Wide.getValueType();
8729 unsigned WideNumElts = WideVT.getVectorNumElements();
8730 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8731
8732 SmallVector<int, 16> ShuffV;
8733 for (unsigned i = 0; i < WideNumElts; ++i)
8734 ShuffV.push_back(i + WideNumElts);
8735
8736 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8737 int SaveElts = FourEltRes ? 4 : 2;
8738 if (Subtarget.isLittleEndian())
8739 for (int i = 0; i < SaveElts; i++)
8740 ShuffV[i * Stride] = i;
8741 else
8742 for (int i = 1; i <= SaveElts; i++)
8743 ShuffV[i * Stride - 1] = i - 1;
8744
8745 SDValue ShuffleSrc2 =
8746 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8747 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8748
8749 SDValue Extend;
8750 if (SignedConv) {
8751 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8752 EVT ExtVT = Src.getValueType();
8753 if (Subtarget.hasP9Altivec())
8754 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8755 IntermediateVT.getVectorNumElements());
8756
8757 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8758 DAG.getValueType(ExtVT));
8759 } else
8760 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8761
8762 if (IsStrict)
8763 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8764 {Op.getOperand(0), Extend}, Flags);
8765
8766 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8767}
8768
8769SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8770 SelectionDAG &DAG) const {
8771 SDLoc dl(Op);
8772 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8773 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8774 bool IsStrict = Op->isStrictFPOpcode();
8775 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8776 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8777
8778 // TODO: Any other flags to propagate?
8780 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8781
8782 EVT InVT = Src.getValueType();
8783 EVT OutVT = Op.getValueType();
8784 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8785 isOperationCustom(Op.getOpcode(), InVT))
8786 return LowerINT_TO_FPVector(Op, DAG, dl);
8787
8788 // Conversions to f128 are legal.
8789 if (Op.getValueType() == MVT::f128)
8790 return Subtarget.hasP9Vector() ? Op : SDValue();
8791
8792 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8793 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8794 return SDValue();
8795
8796 if (Src.getValueType() == MVT::i1) {
8797 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8798 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8799 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8800 if (IsStrict)
8801 return DAG.getMergeValues({Sel, Chain}, dl);
8802 else
8803 return Sel;
8804 }
8805
8806 // If we have direct moves, we can do all the conversion, skip the store/load
8807 // however, without FPCVT we can't do most conversions.
8808 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8809 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8810 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8811
8812 assert((IsSigned || Subtarget.hasFPCVT()) &&
8813 "UINT_TO_FP is supported only with FPCVT");
8814
8815 if (Src.getValueType() == MVT::i64) {
8816 SDValue SINT = Src;
8817 // When converting to single-precision, we actually need to convert
8818 // to double-precision first and then round to single-precision.
8819 // To avoid double-rounding effects during that operation, we have
8820 // to prepare the input operand. Bits that might be truncated when
8821 // converting to double-precision are replaced by a bit that won't
8822 // be lost at this stage, but is below the single-precision rounding
8823 // position.
8824 //
8825 // However, if -enable-unsafe-fp-math is in effect, accept double
8826 // rounding to avoid the extra overhead.
8827 if (Op.getValueType() == MVT::f32 &&
8828 !Subtarget.hasFPCVT() &&
8830
8831 // Twiddle input to make sure the low 11 bits are zero. (If this
8832 // is the case, we are guaranteed the value will fit into the 53 bit
8833 // mantissa of an IEEE double-precision value without rounding.)
8834 // If any of those low 11 bits were not zero originally, make sure
8835 // bit 12 (value 2048) is set instead, so that the final rounding
8836 // to single-precision gets the correct result.
8837 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8838 SINT, DAG.getConstant(2047, dl, MVT::i64));
8839 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8840 Round, DAG.getConstant(2047, dl, MVT::i64));
8841 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8842 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8843 Round, DAG.getConstant(-2048, dl, MVT::i64));
8844
8845 // However, we cannot use that value unconditionally: if the magnitude
8846 // of the input value is small, the bit-twiddling we did above might
8847 // end up visibly changing the output. Fortunately, in that case, we
8848 // don't need to twiddle bits since the original input will convert
8849 // exactly to double-precision floating-point already. Therefore,
8850 // construct a conditional to use the original value if the top 11
8851 // bits are all sign-bit copies, and use the rounded value computed
8852 // above otherwise.
8853 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8854 SINT, DAG.getConstant(53, dl, MVT::i32));
8855 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8856 Cond, DAG.getConstant(1, dl, MVT::i64));
8857 Cond = DAG.getSetCC(
8858 dl,
8859 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8860 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8861
8862 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8863 }
8864
8865 ReuseLoadInfo RLI;
8866 SDValue Bits;
8867
8869 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8870 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8871 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8872 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8873 } else if (Subtarget.hasLFIWAX() &&
8874 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8875 MachineMemOperand *MMO =
8877 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8878 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8880 DAG.getVTList(MVT::f64, MVT::Other),
8881 Ops, MVT::i32, MMO);
8882 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8883 } else if (Subtarget.hasFPCVT() &&
8884 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8885 MachineMemOperand *MMO =
8887 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8888 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8890 DAG.getVTList(MVT::f64, MVT::Other),
8891 Ops, MVT::i32, MMO);
8892 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8893 } else if (((Subtarget.hasLFIWAX() &&
8894 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8895 (Subtarget.hasFPCVT() &&
8896 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8897 SINT.getOperand(0).getValueType() == MVT::i32) {
8898 MachineFrameInfo &MFI = MF.getFrameInfo();
8899 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8900
8901 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8902 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8903
8904 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8906 DAG.getMachineFunction(), FrameIdx));
8907 Chain = Store;
8908
8909 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8910 "Expected an i32 store");
8911
8912 RLI.Ptr = FIdx;
8913 RLI.Chain = Chain;
8914 RLI.MPI =
8916 RLI.Alignment = Align(4);
8917
8918 MachineMemOperand *MMO =
8920 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8921 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8924 dl, DAG.getVTList(MVT::f64, MVT::Other),
8925 Ops, MVT::i32, MMO);
8926 Chain = Bits.getValue(1);
8927 } else
8928 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8929
8930 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8931 if (IsStrict)
8932 Chain = FP.getValue(1);
8933
8934 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8935 if (IsStrict)
8937 DAG.getVTList(MVT::f32, MVT::Other),
8938 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8939 else
8940 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8941 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8942 }
8943 return FP;
8944 }
8945
8946 assert(Src.getValueType() == MVT::i32 &&
8947 "Unhandled INT_TO_FP type in custom expander!");
8948 // Since we only generate this in 64-bit mode, we can take advantage of
8949 // 64-bit registers. In particular, sign extend the input value into the
8950 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8951 // then lfd it and fcfid it.
8953 MachineFrameInfo &MFI = MF.getFrameInfo();
8954 EVT PtrVT = getPointerTy(MF.getDataLayout());
8955
8956 SDValue Ld;
8957 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8958 ReuseLoadInfo RLI;
8959 bool ReusingLoad;
8960 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8961 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8962 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8963
8964 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8966 DAG.getMachineFunction(), FrameIdx));
8967 Chain = Store;
8968
8969 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8970 "Expected an i32 store");
8971
8972 RLI.Ptr = FIdx;
8973 RLI.Chain = Chain;
8974 RLI.MPI =
8976 RLI.Alignment = Align(4);
8977 }
8978
8979 MachineMemOperand *MMO =
8981 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8982 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8983 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8984 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8985 MVT::i32, MMO);
8986 Chain = Ld.getValue(1);
8987 if (ReusingLoad)
8988 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8989 } else {
8990 assert(Subtarget.isPPC64() &&
8991 "i32->FP without LFIWAX supported only on PPC64");
8992
8993 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8994 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8995
8996 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8997
8998 // STD the extended value into the stack slot.
8999 SDValue Store = DAG.getStore(
9000 Chain, dl, Ext64, FIdx,
9002 Chain = Store;
9003
9004 // Load the value as a double.
9005 Ld = DAG.getLoad(
9006 MVT::f64, dl, Chain, FIdx,
9008 Chain = Ld.getValue(1);
9009 }
9010
9011 // FCFID it and return it.
9012 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9013 if (IsStrict)
9014 Chain = FP.getValue(1);
9015 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9016 if (IsStrict)
9018 DAG.getVTList(MVT::f32, MVT::Other),
9019 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
9020 else
9021 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9022 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9023 }
9024 return FP;
9025}
9026
9027SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9028 SelectionDAG &DAG) const {
9029 SDLoc dl(Op);
9030 /*
9031 The rounding mode is in bits 30:31 of FPSR, and has the following
9032 settings:
9033 00 Round to nearest
9034 01 Round to 0
9035 10 Round to +inf
9036 11 Round to -inf
9037
9038 GET_ROUNDING, on the other hand, expects the following:
9039 -1 Undefined
9040 0 Round to 0
9041 1 Round to nearest
9042 2 Round to +inf
9043 3 Round to -inf
9044
9045 To perform the conversion, we do:
9046 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9047 */
9048
9050 EVT VT = Op.getValueType();
9051 EVT PtrVT = getPointerTy(MF.getDataLayout());
9052
9053 // Save FP Control Word to register
9054 SDValue Chain = Op.getOperand(0);
9055 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9056 Chain = MFFS.getValue(1);
9057
9058 SDValue CWD;
9059 if (isTypeLegal(MVT::i64)) {
9060 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9061 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9062 } else {
9063 // Save FP register to stack slot
9064 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9065 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9066 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9067
9068 // Load FP Control Word from low 32 bits of stack slot.
9070 "Stack slot adjustment is valid only on big endian subtargets!");
9071 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9072 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9073 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9074 Chain = CWD.getValue(1);
9075 }
9076
9077 // Transform as necessary
9078 SDValue CWD1 =
9079 DAG.getNode(ISD::AND, dl, MVT::i32,
9080 CWD, DAG.getConstant(3, dl, MVT::i32));
9081 SDValue CWD2 =
9082 DAG.getNode(ISD::SRL, dl, MVT::i32,
9083 DAG.getNode(ISD::AND, dl, MVT::i32,
9084 DAG.getNode(ISD::XOR, dl, MVT::i32,
9085 CWD, DAG.getConstant(3, dl, MVT::i32)),
9086 DAG.getConstant(3, dl, MVT::i32)),
9087 DAG.getConstant(1, dl, MVT::i32));
9088
9089 SDValue RetVal =
9090 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9091
9092 RetVal =
9094 dl, VT, RetVal);
9095
9096 return DAG.getMergeValues({RetVal, Chain}, dl);
9097}
9098
9099SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9100 EVT VT = Op.getValueType();
9101 unsigned BitWidth = VT.getSizeInBits();
9102 SDLoc dl(Op);
9103 assert(Op.getNumOperands() == 3 &&
9104 VT == Op.getOperand(1).getValueType() &&
9105 "Unexpected SHL!");
9106
9107 // Expand into a bunch of logical ops. Note that these ops
9108 // depend on the PPC behavior for oversized shift amounts.
9109 SDValue Lo = Op.getOperand(0);
9110 SDValue Hi = Op.getOperand(1);
9111 SDValue Amt = Op.getOperand(2);
9112 EVT AmtVT = Amt.getValueType();
9113
9114 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9115 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9116 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9117 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9118 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9119 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9120 DAG.getConstant(-BitWidth, dl, AmtVT));
9121 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9122 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9123 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9124 SDValue OutOps[] = { OutLo, OutHi };
9125 return DAG.getMergeValues(OutOps, dl);
9126}
9127
9128SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9129 EVT VT = Op.getValueType();
9130 SDLoc dl(Op);
9131 unsigned BitWidth = VT.getSizeInBits();
9132 assert(Op.getNumOperands() == 3 &&
9133 VT == Op.getOperand(1).getValueType() &&
9134 "Unexpected SRL!");
9135
9136 // Expand into a bunch of logical ops. Note that these ops
9137 // depend on the PPC behavior for oversized shift amounts.
9138 SDValue Lo = Op.getOperand(0);
9139 SDValue Hi = Op.getOperand(1);
9140 SDValue Amt = Op.getOperand(2);
9141 EVT AmtVT = Amt.getValueType();
9142
9143 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9144 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9145 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9146 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9147 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9148 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9149 DAG.getConstant(-BitWidth, dl, AmtVT));
9150 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9151 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9152 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9153 SDValue OutOps[] = { OutLo, OutHi };
9154 return DAG.getMergeValues(OutOps, dl);
9155}
9156
9157SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9158 SDLoc dl(Op);
9159 EVT VT = Op.getValueType();
9160 unsigned BitWidth = VT.getSizeInBits();
9161 assert(Op.getNumOperands() == 3 &&
9162 VT == Op.getOperand(1).getValueType() &&
9163 "Unexpected SRA!");
9164
9165 // Expand into a bunch of logical ops, followed by a select_cc.
9166 SDValue Lo = Op.getOperand(0);
9167 SDValue Hi = Op.getOperand(1);
9168 SDValue Amt = Op.getOperand(2);
9169 EVT AmtVT = Amt.getValueType();
9170
9171 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9172 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9173 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9174 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9175 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9176 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9177 DAG.getConstant(-BitWidth, dl, AmtVT));
9178 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9179 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9180 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9181 Tmp4, Tmp6, ISD::SETLE);
9182 SDValue OutOps[] = { OutLo, OutHi };
9183 return DAG.getMergeValues(OutOps, dl);
9184}
9185
9186SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9187 SelectionDAG &DAG) const {
9188 SDLoc dl(Op);
9189 EVT VT = Op.getValueType();
9190 unsigned BitWidth = VT.getSizeInBits();
9191
9192 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9193 SDValue X = Op.getOperand(0);
9194 SDValue Y = Op.getOperand(1);
9195 SDValue Z = Op.getOperand(2);
9196 EVT AmtVT = Z.getValueType();
9197
9198 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9199 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9200 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9201 // on PowerPC shift by BW being well defined.
9202 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9203 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9204 SDValue SubZ =
9205 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9206 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9207 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9208 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9209}
9210
9211//===----------------------------------------------------------------------===//
9212// Vector related lowering.
9213//
9214
9215/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9216/// element size of SplatSize. Cast the result to VT.
9217static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9218 SelectionDAG &DAG, const SDLoc &dl) {
9219 static const MVT VTys[] = { // canonical VT to use for each size.
9220 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9221 };
9222
9223 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9224
9225 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9226 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9227 SplatSize = 1;
9228 Val = 0xFF;
9229 }
9230
9231 EVT CanonicalVT = VTys[SplatSize-1];
9232
9233 // Build a canonical splat for this value.
9234 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9235}
9236
9237/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9238/// specified intrinsic ID.
9240 const SDLoc &dl, EVT DestVT = MVT::Other) {
9241 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9242 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9243 DAG.getConstant(IID, dl, MVT::i32), Op);
9244}
9245
9246/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9247/// specified intrinsic ID.
9248static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9249 SelectionDAG &DAG, const SDLoc &dl,
9250 EVT DestVT = MVT::Other) {
9251 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9252 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9253 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9254}
9255
9256/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9257/// specified intrinsic ID.
9258static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9259 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9260 EVT DestVT = MVT::Other) {
9261 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9262 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9263 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9264}
9265
9266/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9267/// amount. The result has the specified value type.
9268static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9269 SelectionDAG &DAG, const SDLoc &dl) {
9270 // Force LHS/RHS to be the right type.
9271 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9272 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9273
9274 int Ops[16];
9275 for (unsigned i = 0; i != 16; ++i)
9276 Ops[i] = i + Amt;
9277 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9278 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9279}
9280
9281/// Do we have an efficient pattern in a .td file for this node?
9282///
9283/// \param V - pointer to the BuildVectorSDNode being matched
9284/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9285///
9286/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9287/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9288/// the opposite is true (expansion is beneficial) are:
9289/// - The node builds a vector out of integers that are not 32 or 64-bits
9290/// - The node builds a vector out of constants
9291/// - The node is a "load-and-splat"
9292/// In all other cases, we will choose to keep the BUILD_VECTOR.
9294 bool HasDirectMove,
9295 bool HasP8Vector) {
9296 EVT VecVT = V->getValueType(0);
9297 bool RightType = VecVT == MVT::v2f64 ||
9298 (HasP8Vector && VecVT == MVT::v4f32) ||
9299 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9300 if (!RightType)
9301 return false;
9302
9303 bool IsSplat = true;
9304 bool IsLoad = false;
9305 SDValue Op0 = V->getOperand(0);
9306
9307 // This function is called in a block that confirms the node is not a constant
9308 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9309 // different constants.
9310 if (V->isConstant())
9311 return false;
9312 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9313 if (V->getOperand(i).isUndef())
9314 return false;
9315 // We want to expand nodes that represent load-and-splat even if the
9316 // loaded value is a floating point truncation or conversion to int.
9317 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9318 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9319 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9320 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9321 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9322 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9323 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9324 IsLoad = true;
9325 // If the operands are different or the input is not a load and has more
9326 // uses than just this BV node, then it isn't a splat.
9327 if (V->getOperand(i) != Op0 ||
9328 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9329 IsSplat = false;
9330 }
9331 return !(IsSplat && IsLoad);
9332}
9333
9334// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9335SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9336
9337 SDLoc dl(Op);
9338 SDValue Op0 = Op->getOperand(0);
9339
9340 if ((Op.getValueType() != MVT::f128) ||
9341 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9342 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9343 (Op0.getOperand(1).getValueType() != MVT::i64) || !Subtarget.isPPC64())
9344 return SDValue();
9345
9346 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9347 Op0.getOperand(1));
9348}
9349
9350static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9351 const SDValue *InputLoad = &Op;
9352 while (InputLoad->getOpcode() == ISD::BITCAST)
9353 InputLoad = &InputLoad->getOperand(0);
9354 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9356 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9357 InputLoad = &InputLoad->getOperand(0);
9358 }
9359 if (InputLoad->getOpcode() != ISD::LOAD)
9360 return nullptr;
9361 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9362 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9363}
9364
9365// Convert the argument APFloat to a single precision APFloat if there is no
9366// loss in information during the conversion to single precision APFloat and the
9367// resulting number is not a denormal number. Return true if successful.
9369 APFloat APFloatToConvert = ArgAPFloat;
9370 bool LosesInfo = true;
9372 &LosesInfo);
9373 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9374 if (Success)
9375 ArgAPFloat = APFloatToConvert;
9376 return Success;
9377}
9378
9379// Bitcast the argument APInt to a double and convert it to a single precision
9380// APFloat, bitcast the APFloat to an APInt and assign it to the original
9381// argument if there is no loss in information during the conversion from
9382// double to single precision APFloat and the resulting number is not a denormal
9383// number. Return true if successful.
9385 double DpValue = ArgAPInt.bitsToDouble();
9386 APFloat APFloatDp(DpValue);
9387 bool Success = convertToNonDenormSingle(APFloatDp);
9388 if (Success)
9389 ArgAPInt = APFloatDp.bitcastToAPInt();
9390 return Success;
9391}
9392
9393// Nondestructive check for convertTonNonDenormSingle.
9395 // Only convert if it loses info, since XXSPLTIDP should
9396 // handle the other case.
9397 APFloat APFloatToConvert = ArgAPFloat;
9398 bool LosesInfo = true;
9400 &LosesInfo);
9401
9402 return (!LosesInfo && !APFloatToConvert.isDenormal());
9403}
9404
9405static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9406 unsigned &Opcode) {
9407 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9408 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9409 return false;
9410
9411 EVT Ty = Op->getValueType(0);
9412 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9413 // as we cannot handle extending loads for these types.
9414 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9415 ISD::isNON_EXTLoad(InputNode))
9416 return true;
9417
9418 EVT MemVT = InputNode->getMemoryVT();
9419 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9420 // memory VT is the same vector element VT type.
9421 // The loads feeding into the v8i16 and v16i8 types will be extending because
9422 // scalar i8/i16 are not legal types.
9423 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9424 (MemVT == Ty.getVectorElementType()))
9425 return true;
9426
9427 if (Ty == MVT::v2i64) {
9428 // Check the extend type, when the input type is i32, and the output vector
9429 // type is v2i64.
9430 if (MemVT == MVT::i32) {
9431 if (ISD::isZEXTLoad(InputNode))
9432 Opcode = PPCISD::ZEXT_LD_SPLAT;
9433 if (ISD::isSEXTLoad(InputNode))
9434 Opcode = PPCISD::SEXT_LD_SPLAT;
9435 }
9436 return true;
9437 }
9438 return false;
9439}
9440
9441// If this is a case we can't handle, return null and let the default
9442// expansion code take care of it. If we CAN select this case, and if it
9443// selects to a single instruction, return Op. Otherwise, if we can codegen
9444// this case more efficiently than a constant pool load, lower it to the
9445// sequence of ops that should be used.
9446SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9447 SelectionDAG &DAG) const {
9448 SDLoc dl(Op);
9449 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9450 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9451
9452 // Check if this is a splat of a constant value.
9453 APInt APSplatBits, APSplatUndef;
9454 unsigned SplatBitSize;
9455 bool HasAnyUndefs;
9456 bool BVNIsConstantSplat =
9457 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9458 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9459
9460 // If it is a splat of a double, check if we can shrink it to a 32 bit
9461 // non-denormal float which when converted back to double gives us the same
9462 // double. This is to exploit the XXSPLTIDP instruction.
9463 // If we lose precision, we use XXSPLTI32DX.
9464 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9465 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9466 // Check the type first to short-circuit so we don't modify APSplatBits if
9467 // this block isn't executed.
9468 if ((Op->getValueType(0) == MVT::v2f64) &&
9469 convertToNonDenormSingle(APSplatBits)) {
9470 SDValue SplatNode = DAG.getNode(
9471 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9472 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9473 return DAG.getBitcast(Op.getValueType(), SplatNode);
9474 } else {
9475 // We may lose precision, so we have to use XXSPLTI32DX.
9476
9477 uint32_t Hi =
9478 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9479 uint32_t Lo =
9480 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9481 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9482
9483 if (!Hi || !Lo)
9484 // If either load is 0, then we should generate XXLXOR to set to 0.
9485 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9486
9487 if (Hi)
9488 SplatNode = DAG.getNode(
9489 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9490 DAG.getTargetConstant(0, dl, MVT::i32),
9491 DAG.getTargetConstant(Hi, dl, MVT::i32));
9492
9493 if (Lo)
9494 SplatNode =
9495 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9496 DAG.getTargetConstant(1, dl, MVT::i32),
9497 DAG.getTargetConstant(Lo, dl, MVT::i32));
9498
9499 return DAG.getBitcast(Op.getValueType(), SplatNode);
9500 }
9501 }
9502
9503 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9504 unsigned NewOpcode = PPCISD::LD_SPLAT;
9505
9506 // Handle load-and-splat patterns as we have instructions that will do this
9507 // in one go.
9508 if (DAG.isSplatValue(Op, true) &&
9509 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9510 const SDValue *InputLoad = &Op.getOperand(0);
9511 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9512
9513 // If the input load is an extending load, it will be an i32 -> i64
9514 // extending load and isValidSplatLoad() will update NewOpcode.
9515 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9516 unsigned ElementSize =
9517 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9518
9519 assert(((ElementSize == 2 * MemorySize)
9520 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9521 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9522 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9523 "Unmatched element size and opcode!\n");
9524
9525 // Checking for a single use of this load, we have to check for vector
9526 // width (128 bits) / ElementSize uses (since each operand of the
9527 // BUILD_VECTOR is a separate use of the value.
9528 unsigned NumUsesOfInputLD = 128 / ElementSize;
9529 for (SDValue BVInOp : Op->ops())
9530 if (BVInOp.isUndef())
9531 NumUsesOfInputLD--;
9532
9533 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9534 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9535 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9536 // 15", but function IsValidSplatLoad() now will only return true when
9537 // the data at index 0 is not nullptr. So we will not get into trouble for
9538 // these cases.
9539 //
9540 // case 1 - lfiwzx/lfiwax
9541 // 1.1: load result is i32 and is sign/zero extend to i64;
9542 // 1.2: build a v2i64 vector type with above loaded value;
9543 // 1.3: the vector has only one value at index 0, others are all undef;
9544 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9545 if (NumUsesOfInputLD == 1 &&
9546 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9547 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9548 Subtarget.hasLFIWAX()))
9549 return SDValue();
9550
9551 // case 2 - lxvr[hb]x
9552 // 2.1: load result is at most i16;
9553 // 2.2: build a vector with above loaded value;
9554 // 2.3: the vector has only one value at index 0, others are all undef;
9555 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9556 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9557 Subtarget.isISA3_1() && ElementSize <= 16)
9558 return SDValue();
9559
9560 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9561 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9562 Subtarget.hasVSX()) {
9563 SDValue Ops[] = {
9564 LD->getChain(), // Chain
9565 LD->getBasePtr(), // Ptr
9566 DAG.getValueType(Op.getValueType()) // VT
9567 };
9568 SDValue LdSplt = DAG.getMemIntrinsicNode(
9569 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9570 LD->getMemoryVT(), LD->getMemOperand());
9571 // Replace all uses of the output chain of the original load with the
9572 // output chain of the new load.
9573 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9574 LdSplt.getValue(1));
9575 return LdSplt;
9576 }
9577 }
9578
9579 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9580 // 32-bits can be lowered to VSX instructions under certain conditions.
9581 // Without VSX, there is no pattern more efficient than expanding the node.
9582 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9583 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9584 Subtarget.hasP8Vector()))
9585 return Op;
9586 return SDValue();
9587 }
9588
9589 uint64_t SplatBits = APSplatBits.getZExtValue();
9590 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9591 unsigned SplatSize = SplatBitSize / 8;
9592
9593 // First, handle single instruction cases.
9594
9595 // All zeros?
9596 if (SplatBits == 0) {
9597 // Canonicalize all zero vectors to be v4i32.
9598 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9599 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9600 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9601 }
9602 return Op;
9603 }
9604
9605 // We have XXSPLTIW for constant splats four bytes wide.
9606 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9607 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9608 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9609 // turned into a 4-byte splat of 0xABABABAB.
9610 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9611 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9612 Op.getValueType(), DAG, dl);
9613
9614 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9615 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9616 dl);
9617
9618 // We have XXSPLTIB for constant splats one byte wide.
9619 if (Subtarget.hasP9Vector() && SplatSize == 1)
9620 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9621 dl);
9622
9623 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9624 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9625 (32-SplatBitSize));
9626 if (SextVal >= -16 && SextVal <= 15)
9627 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9628 dl);
9629
9630 // Two instruction sequences.
9631
9632 // If this value is in the range [-32,30] and is even, use:
9633 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9634 // If this value is in the range [17,31] and is odd, use:
9635 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9636 // If this value is in the range [-31,-17] and is odd, use:
9637 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9638 // Note the last two are three-instruction sequences.
9639 if (SextVal >= -32 && SextVal <= 31) {
9640 // To avoid having these optimizations undone by constant folding,
9641 // we convert to a pseudo that will be expanded later into one of
9642 // the above forms.
9643 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9644 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9645 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9646 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9647 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9648 if (VT == Op.getValueType())
9649 return RetVal;
9650 else
9651 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9652 }
9653
9654 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9655 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9656 // for fneg/fabs.
9657 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9658 // Make -1 and vspltisw -1:
9659 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9660
9661 // Make the VSLW intrinsic, computing 0x8000_0000.
9662 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9663 OnesV, DAG, dl);
9664
9665 // xor by OnesV to invert it.
9666 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9667 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9668 }
9669
9670 // Check to see if this is a wide variety of vsplti*, binop self cases.
9671 static const signed char SplatCsts[] = {
9672 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9673 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9674 };
9675
9676 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9677 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9678 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9679 int i = SplatCsts[idx];
9680
9681 // Figure out what shift amount will be used by altivec if shifted by i in
9682 // this splat size.
9683 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9684
9685 // vsplti + shl self.
9686 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9687 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9688 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9689 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9690 Intrinsic::ppc_altivec_vslw
9691 };
9692 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9693 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9694 }
9695
9696 // vsplti + srl self.
9697 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9698 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9699 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9700 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9701 Intrinsic::ppc_altivec_vsrw
9702 };
9703 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9704 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9705 }
9706
9707 // vsplti + rol self.
9708 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9709 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9710 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9711 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9712 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9713 Intrinsic::ppc_altivec_vrlw
9714 };
9715 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9716 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9717 }
9718
9719 // t = vsplti c, result = vsldoi t, t, 1
9720 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9721 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9722 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9723 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9724 }
9725 // t = vsplti c, result = vsldoi t, t, 2
9726 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9727 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9728 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9729 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9730 }
9731 // t = vsplti c, result = vsldoi t, t, 3
9732 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9733 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9734 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9735 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9736 }
9737 }
9738
9739 return SDValue();
9740}
9741
9742/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9743/// the specified operations to build the shuffle.
9744static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9745 SDValue RHS, SelectionDAG &DAG,
9746 const SDLoc &dl) {
9747 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9748 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9749 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9750
9751 enum {
9752 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9753 OP_VMRGHW,
9754 OP_VMRGLW,
9755 OP_VSPLTISW0,
9756 OP_VSPLTISW1,
9757 OP_VSPLTISW2,
9758 OP_VSPLTISW3,
9759 OP_VSLDOI4,
9760 OP_VSLDOI8,
9761 OP_VSLDOI12
9762 };
9763
9764 if (OpNum == OP_COPY) {
9765 if (LHSID == (1*9+2)*9+3) return LHS;
9766 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9767 return RHS;
9768 }
9769
9770 SDValue OpLHS, OpRHS;
9771 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9772 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9773
9774 int ShufIdxs[16];
9775 switch (OpNum) {
9776 default: llvm_unreachable("Unknown i32 permute!");
9777 case OP_VMRGHW:
9778 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9779 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9780 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9781 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9782 break;
9783 case OP_VMRGLW:
9784 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9785 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9786 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9787 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9788 break;
9789 case OP_VSPLTISW0:
9790 for (unsigned i = 0; i != 16; ++i)
9791 ShufIdxs[i] = (i&3)+0;
9792 break;
9793 case OP_VSPLTISW1:
9794 for (unsigned i = 0; i != 16; ++i)
9795 ShufIdxs[i] = (i&3)+4;
9796 break;
9797 case OP_VSPLTISW2:
9798 for (unsigned i = 0; i != 16; ++i)
9799 ShufIdxs[i] = (i&3)+8;
9800 break;
9801 case OP_VSPLTISW3:
9802 for (unsigned i = 0; i != 16; ++i)
9803 ShufIdxs[i] = (i&3)+12;
9804 break;
9805 case OP_VSLDOI4:
9806 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9807 case OP_VSLDOI8:
9808 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9809 case OP_VSLDOI12:
9810 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9811 }
9812 EVT VT = OpLHS.getValueType();
9813 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9814 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9815 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9816 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9817}
9818
9819/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9820/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9821/// SDValue.
9822SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9823 SelectionDAG &DAG) const {
9824 const unsigned BytesInVector = 16;
9825 bool IsLE = Subtarget.isLittleEndian();
9826 SDLoc dl(N);
9827 SDValue V1 = N->getOperand(0);
9828 SDValue V2 = N->getOperand(1);
9829 unsigned ShiftElts = 0, InsertAtByte = 0;
9830 bool Swap = false;
9831
9832 // Shifts required to get the byte we want at element 7.
9833 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9834 0, 15, 14, 13, 12, 11, 10, 9};
9835 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9836 1, 2, 3, 4, 5, 6, 7, 8};
9837
9838 ArrayRef<int> Mask = N->getMask();
9839 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9840
9841 // For each mask element, find out if we're just inserting something
9842 // from V2 into V1 or vice versa.
9843 // Possible permutations inserting an element from V2 into V1:
9844 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9845 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9846 // ...
9847 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9848 // Inserting from V1 into V2 will be similar, except mask range will be
9849 // [16,31].
9850
9851 bool FoundCandidate = false;
9852 // If both vector operands for the shuffle are the same vector, the mask
9853 // will contain only elements from the first one and the second one will be
9854 // undef.
9855 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9856 // Go through the mask of half-words to find an element that's being moved
9857 // from one vector to the other.
9858 for (unsigned i = 0; i < BytesInVector; ++i) {
9859 unsigned CurrentElement = Mask[i];
9860 // If 2nd operand is undefined, we should only look for element 7 in the
9861 // Mask.
9862 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9863 continue;
9864
9865 bool OtherElementsInOrder = true;
9866 // Examine the other elements in the Mask to see if they're in original
9867 // order.
9868 for (unsigned j = 0; j < BytesInVector; ++j) {
9869 if (j == i)
9870 continue;
9871 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9872 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9873 // in which we always assume we're always picking from the 1st operand.
9874 int MaskOffset =
9875 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9876 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9877 OtherElementsInOrder = false;
9878 break;
9879 }
9880 }
9881 // If other elements are in original order, we record the number of shifts
9882 // we need to get the element we want into element 7. Also record which byte
9883 // in the vector we should insert into.
9884 if (OtherElementsInOrder) {
9885 // If 2nd operand is undefined, we assume no shifts and no swapping.
9886 if (V2.isUndef()) {
9887 ShiftElts = 0;
9888 Swap = false;
9889 } else {
9890 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9891 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9892 : BigEndianShifts[CurrentElement & 0xF];
9893 Swap = CurrentElement < BytesInVector;
9894 }
9895 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9896 FoundCandidate = true;
9897 break;
9898 }
9899 }
9900
9901 if (!FoundCandidate)
9902 return SDValue();
9903
9904 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9905 // optionally with VECSHL if shift is required.
9906 if (Swap)
9907 std::swap(V1, V2);
9908 if (V2.isUndef())
9909 V2 = V1;
9910 if (ShiftElts) {
9911 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9912 DAG.getConstant(ShiftElts, dl, MVT::i32));
9913 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9914 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9915 }
9916 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9917 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9918}
9919
9920/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9921/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9922/// SDValue.
9923SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9924 SelectionDAG &DAG) const {
9925 const unsigned NumHalfWords = 8;
9926 const unsigned BytesInVector = NumHalfWords * 2;
9927 // Check that the shuffle is on half-words.
9928 if (!isNByteElemShuffleMask(N, 2, 1))
9929 return SDValue();
9930
9931 bool IsLE = Subtarget.isLittleEndian();
9932 SDLoc dl(N);
9933 SDValue V1 = N->getOperand(0);
9934 SDValue V2 = N->getOperand(1);
9935 unsigned ShiftElts = 0, InsertAtByte = 0;
9936 bool Swap = false;
9937
9938 // Shifts required to get the half-word we want at element 3.
9939 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9940 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9941
9942 uint32_t Mask = 0;
9943 uint32_t OriginalOrderLow = 0x1234567;
9944 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9945 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9946 // 32-bit space, only need 4-bit nibbles per element.
9947 for (unsigned i = 0; i < NumHalfWords; ++i) {
9948 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9949 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9950 }
9951
9952 // For each mask element, find out if we're just inserting something
9953 // from V2 into V1 or vice versa. Possible permutations inserting an element
9954 // from V2 into V1:
9955 // X, 1, 2, 3, 4, 5, 6, 7
9956 // 0, X, 2, 3, 4, 5, 6, 7
9957 // 0, 1, X, 3, 4, 5, 6, 7
9958 // 0, 1, 2, X, 4, 5, 6, 7
9959 // 0, 1, 2, 3, X, 5, 6, 7
9960 // 0, 1, 2, 3, 4, X, 6, 7
9961 // 0, 1, 2, 3, 4, 5, X, 7
9962 // 0, 1, 2, 3, 4, 5, 6, X
9963 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9964
9965 bool FoundCandidate = false;
9966 // Go through the mask of half-words to find an element that's being moved
9967 // from one vector to the other.
9968 for (unsigned i = 0; i < NumHalfWords; ++i) {
9969 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9970 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9971 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9972 uint32_t TargetOrder = 0x0;
9973
9974 // If both vector operands for the shuffle are the same vector, the mask
9975 // will contain only elements from the first one and the second one will be
9976 // undef.
9977 if (V2.isUndef()) {
9978 ShiftElts = 0;
9979 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9980 TargetOrder = OriginalOrderLow;
9981 Swap = false;
9982 // Skip if not the correct element or mask of other elements don't equal
9983 // to our expected order.
9984 if (MaskOneElt == VINSERTHSrcElem &&
9985 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9986 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9987 FoundCandidate = true;
9988 break;
9989 }
9990 } else { // If both operands are defined.
9991 // Target order is [8,15] if the current mask is between [0,7].
9992 TargetOrder =
9993 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9994 // Skip if mask of other elements don't equal our expected order.
9995 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9996 // We only need the last 3 bits for the number of shifts.
9997 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9998 : BigEndianShifts[MaskOneElt & 0x7];
9999 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10000 Swap = MaskOneElt < NumHalfWords;
10001 FoundCandidate = true;
10002 break;
10003 }
10004 }
10005 }
10006
10007 if (!FoundCandidate)
10008 return SDValue();
10009
10010 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10011 // optionally with VECSHL if shift is required.
10012 if (Swap)
10013 std::swap(V1, V2);
10014 if (V2.isUndef())
10015 V2 = V1;
10016 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10017 if (ShiftElts) {
10018 // Double ShiftElts because we're left shifting on v16i8 type.
10019 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10020 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10021 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10022 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10023 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10024 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10025 }
10026 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10027 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10028 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10029 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10030}
10031
10032/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10033/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10034/// return the default SDValue.
10035SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10036 SelectionDAG &DAG) const {
10037 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10038 // to v16i8. Peek through the bitcasts to get the actual operands.
10041
10042 auto ShuffleMask = SVN->getMask();
10043 SDValue VecShuffle(SVN, 0);
10044 SDLoc DL(SVN);
10045
10046 // Check that we have a four byte shuffle.
10047 if (!isNByteElemShuffleMask(SVN, 4, 1))
10048 return SDValue();
10049
10050 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10051 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10052 std::swap(LHS, RHS);
10054 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10055 if (!CommutedSV)
10056 return SDValue();
10057 ShuffleMask = CommutedSV->getMask();
10058 }
10059
10060 // Ensure that the RHS is a vector of constants.
10061 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10062 if (!BVN)
10063 return SDValue();
10064
10065 // Check if RHS is a splat of 4-bytes (or smaller).
10066 APInt APSplatValue, APSplatUndef;
10067 unsigned SplatBitSize;
10068 bool HasAnyUndefs;
10069 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10070 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10071 SplatBitSize > 32)
10072 return SDValue();
10073
10074 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10075 // The instruction splats a constant C into two words of the source vector
10076 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10077 // Thus we check that the shuffle mask is the equivalent of
10078 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10079 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10080 // within each word are consecutive, so we only need to check the first byte.
10081 SDValue Index;
10082 bool IsLE = Subtarget.isLittleEndian();
10083 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10084 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10085 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10086 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10087 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10088 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10089 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10090 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10091 else
10092 return SDValue();
10093
10094 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10095 // for XXSPLTI32DX.
10096 unsigned SplatVal = APSplatValue.getZExtValue();
10097 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10098 SplatVal |= (SplatVal << SplatBitSize);
10099
10100 SDValue SplatNode = DAG.getNode(
10101 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10102 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10103 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10104}
10105
10106/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10107/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10108/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10109/// i.e (or (shl x, C1), (srl x, 128-C1)).
10110SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10111 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10112 assert(Op.getValueType() == MVT::v1i128 &&
10113 "Only set v1i128 as custom, other type shouldn't reach here!");
10114 SDLoc dl(Op);
10115 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10116 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10117 unsigned SHLAmt = N1.getConstantOperandVal(0);
10118 if (SHLAmt % 8 == 0) {
10119 std::array<int, 16> Mask;
10120 std::iota(Mask.begin(), Mask.end(), 0);
10121 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10122 if (SDValue Shuffle =
10123 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10124 DAG.getUNDEF(MVT::v16i8), Mask))
10125 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10126 }
10127 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10128 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10129 DAG.getConstant(SHLAmt, dl, MVT::i32));
10130 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10131 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10132 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10133 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10134}
10135
10136/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10137/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10138/// return the code it can be lowered into. Worst case, it can always be
10139/// lowered into a vperm.
10140SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10141 SelectionDAG &DAG) const {
10142 SDLoc dl(Op);
10143 SDValue V1 = Op.getOperand(0);
10144 SDValue V2 = Op.getOperand(1);
10145 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10146
10147 // Any nodes that were combined in the target-independent combiner prior
10148 // to vector legalization will not be sent to the target combine. Try to
10149 // combine it here.
10150 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10151 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10152 return NewShuffle;
10153 Op = NewShuffle;
10154 SVOp = cast<ShuffleVectorSDNode>(Op);
10155 V1 = Op.getOperand(0);
10156 V2 = Op.getOperand(1);
10157 }
10158 EVT VT = Op.getValueType();
10159 bool isLittleEndian = Subtarget.isLittleEndian();
10160
10161 unsigned ShiftElts, InsertAtByte;
10162 bool Swap = false;
10163
10164 // If this is a load-and-splat, we can do that with a single instruction
10165 // in some cases. However if the load has multiple uses, we don't want to
10166 // combine it because that will just produce multiple loads.
10167 bool IsPermutedLoad = false;
10168 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10169 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10170 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10171 InputLoad->hasOneUse()) {
10172 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10173 int SplatIdx =
10174 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10175
10176 // The splat index for permuted loads will be in the left half of the vector
10177 // which is strictly wider than the loaded value by 8 bytes. So we need to
10178 // adjust the splat index to point to the correct address in memory.
10179 if (IsPermutedLoad) {
10180 assert((isLittleEndian || IsFourByte) &&
10181 "Unexpected size for permuted load on big endian target");
10182 SplatIdx += IsFourByte ? 2 : 1;
10183 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10184 "Splat of a value outside of the loaded memory");
10185 }
10186
10187 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10188 // For 4-byte load-and-splat, we need Power9.
10189 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10190 uint64_t Offset = 0;
10191 if (IsFourByte)
10192 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10193 else
10194 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10195
10196 // If the width of the load is the same as the width of the splat,
10197 // loading with an offset would load the wrong memory.
10198 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10199 Offset = 0;
10200
10201 SDValue BasePtr = LD->getBasePtr();
10202 if (Offset != 0)
10204 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10205 SDValue Ops[] = {
10206 LD->getChain(), // Chain
10207 BasePtr, // BasePtr
10208 DAG.getValueType(Op.getValueType()) // VT
10209 };
10210 SDVTList VTL =
10211 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10212 SDValue LdSplt =
10214 Ops, LD->getMemoryVT(), LD->getMemOperand());
10215 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10216 if (LdSplt.getValueType() != SVOp->getValueType(0))
10217 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10218 return LdSplt;
10219 }
10220 }
10221
10222 // All v2i64 and v2f64 shuffles are legal
10223 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10224 return Op;
10225
10226 if (Subtarget.hasP9Vector() &&
10227 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10228 isLittleEndian)) {
10229 if (V2.isUndef())
10230 V2 = V1;
10231 else if (Swap)
10232 std::swap(V1, V2);
10233 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10234 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10235 if (ShiftElts) {
10236 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10237 DAG.getConstant(ShiftElts, dl, MVT::i32));
10238 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10239 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10240 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10241 }
10242 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10243 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10244 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10245 }
10246
10247 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10248 SDValue SplatInsertNode;
10249 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10250 return SplatInsertNode;
10251 }
10252
10253 if (Subtarget.hasP9Altivec()) {
10254 SDValue NewISDNode;
10255 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10256 return NewISDNode;
10257
10258 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10259 return NewISDNode;
10260 }
10261
10262 if (Subtarget.hasVSX() &&
10263 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10264 if (Swap)
10265 std::swap(V1, V2);
10266 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10267 SDValue Conv2 =
10268 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10269
10270 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10271 DAG.getConstant(ShiftElts, dl, MVT::i32));
10272 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10273 }
10274
10275 if (Subtarget.hasVSX() &&
10276 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10277 if (Swap)
10278 std::swap(V1, V2);
10279 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10280 SDValue Conv2 =
10281 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10282
10283 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10284 DAG.getConstant(ShiftElts, dl, MVT::i32));
10285 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10286 }
10287
10288 if (Subtarget.hasP9Vector()) {
10289 if (PPC::isXXBRHShuffleMask(SVOp)) {
10290 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10291 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10292 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10293 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10294 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10295 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10296 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10297 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10298 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10299 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10300 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10301 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10302 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10303 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10304 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10305 }
10306 }
10307
10308 if (Subtarget.hasVSX()) {
10309 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10310 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10311
10312 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10313 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10314 DAG.getConstant(SplatIdx, dl, MVT::i32));
10315 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10316 }
10317
10318 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10319 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10320 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10321 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10322 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10323 }
10324 }
10325
10326 // Cases that are handled by instructions that take permute immediates
10327 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10328 // selected by the instruction selector.
10329 if (V2.isUndef()) {
10330 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10331 PPC::isSplatShuffleMask(SVOp, 2) ||
10332 PPC::isSplatShuffleMask(SVOp, 4) ||
10333 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10334 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10335 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10336 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10337 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10338 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10339 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10340 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10341 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10342 (Subtarget.hasP8Altivec() && (
10343 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10344 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10345 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10346 return Op;
10347 }
10348 }
10349
10350 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10351 // and produce a fixed permutation. If any of these match, do not lower to
10352 // VPERM.
10353 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10354 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10355 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10356 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10357 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10358 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10359 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10360 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10361 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10362 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10363 (Subtarget.hasP8Altivec() && (
10364 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10365 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10366 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10367 return Op;
10368
10369 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10370 // perfect shuffle table to emit an optimal matching sequence.
10371 ArrayRef<int> PermMask = SVOp->getMask();
10372
10373 if (!DisablePerfectShuffle && !isLittleEndian) {
10374 unsigned PFIndexes[4];
10375 bool isFourElementShuffle = true;
10376 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10377 ++i) { // Element number
10378 unsigned EltNo = 8; // Start out undef.
10379 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10380 if (PermMask[i * 4 + j] < 0)
10381 continue; // Undef, ignore it.
10382
10383 unsigned ByteSource = PermMask[i * 4 + j];
10384 if ((ByteSource & 3) != j) {
10385 isFourElementShuffle = false;
10386 break;
10387 }
10388
10389 if (EltNo == 8) {
10390 EltNo = ByteSource / 4;
10391 } else if (EltNo != ByteSource / 4) {
10392 isFourElementShuffle = false;
10393 break;
10394 }
10395 }
10396 PFIndexes[i] = EltNo;
10397 }
10398
10399 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10400 // perfect shuffle vector to determine if it is cost effective to do this as
10401 // discrete instructions, or whether we should use a vperm.
10402 // For now, we skip this for little endian until such time as we have a
10403 // little-endian perfect shuffle table.
10404 if (isFourElementShuffle) {
10405 // Compute the index in the perfect shuffle table.
10406 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10407 PFIndexes[2] * 9 + PFIndexes[3];
10408
10409 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10410 unsigned Cost = (PFEntry >> 30);
10411
10412 // Determining when to avoid vperm is tricky. Many things affect the cost
10413 // of vperm, particularly how many times the perm mask needs to be
10414 // computed. For example, if the perm mask can be hoisted out of a loop or
10415 // is already used (perhaps because there are multiple permutes with the
10416 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10417 // permute mask out of the loop requires an extra register.
10418 //
10419 // As a compromise, we only emit discrete instructions if the shuffle can
10420 // be generated in 3 or fewer operations. When we have loop information
10421 // available, if this block is within a loop, we should avoid using vperm
10422 // for 3-operation perms and use a constant pool load instead.
10423 if (Cost < 3)
10424 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10425 }
10426 }
10427
10428 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10429 // vector that will get spilled to the constant pool.
10430 if (V2.isUndef()) V2 = V1;
10431
10432 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10433}
10434
10435SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10436 ArrayRef<int> PermMask, EVT VT,
10437 SDValue V1, SDValue V2) const {
10438 unsigned Opcode = PPCISD::VPERM;
10439 EVT ValType = V1.getValueType();
10440 SDLoc dl(Op);
10441 bool NeedSwap = false;
10442 bool isLittleEndian = Subtarget.isLittleEndian();
10443 bool isPPC64 = Subtarget.isPPC64();
10444
10445 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10446 (V1->hasOneUse() || V2->hasOneUse())) {
10447 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10448 "XXPERM instead\n");
10449 Opcode = PPCISD::XXPERM;
10450
10451 // The second input to XXPERM is also an output so if the second input has
10452 // multiple uses then copying is necessary, as a result we want the
10453 // single-use operand to be used as the second input to prevent copying.
10454 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10455 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10456 std::swap(V1, V2);
10457 NeedSwap = !NeedSwap;
10458 }
10459 }
10460
10461 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10462 // that it is in input element units, not in bytes. Convert now.
10463
10464 // For little endian, the order of the input vectors is reversed, and
10465 // the permutation mask is complemented with respect to 31. This is
10466 // necessary to produce proper semantics with the big-endian-based vperm
10467 // instruction.
10468 EVT EltVT = V1.getValueType().getVectorElementType();
10469 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10470
10471 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10472 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10473
10474 /*
10475 Vectors will be appended like so: [ V1 | v2 ]
10476 XXSWAPD on V1:
10477 [ A | B | C | D ] -> [ C | D | A | B ]
10478 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10479 i.e. index of A, B += 8, and index of C, D -= 8.
10480 XXSWAPD on V2:
10481 [ E | F | G | H ] -> [ G | H | E | F ]
10482 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10483 i.e. index of E, F += 8, index of G, H -= 8
10484 Swap V1 and V2:
10485 [ V1 | V2 ] -> [ V2 | V1 ]
10486 0-15 16-31 0-15 16-31
10487 i.e. index of V1 += 16, index of V2 -= 16
10488 */
10489
10490 SmallVector<SDValue, 16> ResultMask;
10491 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10492 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10493
10494 if (V1HasXXSWAPD) {
10495 if (SrcElt < 8)
10496 SrcElt += 8;
10497 else if (SrcElt < 16)
10498 SrcElt -= 8;
10499 }
10500 if (V2HasXXSWAPD) {
10501 if (SrcElt > 23)
10502 SrcElt -= 8;
10503 else if (SrcElt > 15)
10504 SrcElt += 8;
10505 }
10506 if (NeedSwap) {
10507 if (SrcElt < 16)
10508 SrcElt += 16;
10509 else
10510 SrcElt -= 16;
10511 }
10512 for (unsigned j = 0; j != BytesPerElement; ++j)
10513 if (isLittleEndian)
10514 ResultMask.push_back(
10515 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10516 else
10517 ResultMask.push_back(
10518 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10519 }
10520
10521 if (V1HasXXSWAPD) {
10522 dl = SDLoc(V1->getOperand(0));
10523 V1 = V1->getOperand(0)->getOperand(1);
10524 }
10525 if (V2HasXXSWAPD) {
10526 dl = SDLoc(V2->getOperand(0));
10527 V2 = V2->getOperand(0)->getOperand(1);
10528 }
10529
10530 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10531 if (ValType != MVT::v2f64)
10532 V1 = DAG.getBitcast(MVT::v2f64, V1);
10533 if (V2.getValueType() != MVT::v2f64)
10534 V2 = DAG.getBitcast(MVT::v2f64, V2);
10535 }
10536
10537 ShufflesHandledWithVPERM++;
10538 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10539 LLVM_DEBUG({
10540 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10541 if (Opcode == PPCISD::XXPERM) {
10542 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10543 } else {
10544 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10545 }
10546 SVOp->dump();
10547 dbgs() << "With the following permute control vector:\n";
10548 VPermMask.dump();
10549 });
10550
10551 if (Opcode == PPCISD::XXPERM)
10552 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10553
10554 // Only need to place items backwards in LE,
10555 // the mask was properly calculated.
10556 if (isLittleEndian)
10557 std::swap(V1, V2);
10558
10559 SDValue VPERMNode =
10560 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10561
10562 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10563 return VPERMNode;
10564}
10565
10566/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10567/// vector comparison. If it is, return true and fill in Opc/isDot with
10568/// information about the intrinsic.
10569static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10570 bool &isDot, const PPCSubtarget &Subtarget) {
10571 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10572 CompareOpc = -1;
10573 isDot = false;
10574 switch (IntrinsicID) {
10575 default:
10576 return false;
10577 // Comparison predicates.
10578 case Intrinsic::ppc_altivec_vcmpbfp_p:
10579 CompareOpc = 966;
10580 isDot = true;
10581 break;
10582 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10583 CompareOpc = 198;
10584 isDot = true;
10585 break;
10586 case Intrinsic::ppc_altivec_vcmpequb_p:
10587 CompareOpc = 6;
10588 isDot = true;
10589 break;
10590 case Intrinsic::ppc_altivec_vcmpequh_p:
10591 CompareOpc = 70;
10592 isDot = true;
10593 break;
10594 case Intrinsic::ppc_altivec_vcmpequw_p:
10595 CompareOpc = 134;
10596 isDot = true;
10597 break;
10598 case Intrinsic::ppc_altivec_vcmpequd_p:
10599 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10600 CompareOpc = 199;
10601 isDot = true;
10602 } else
10603 return false;
10604 break;
10605 case Intrinsic::ppc_altivec_vcmpneb_p:
10606 case Intrinsic::ppc_altivec_vcmpneh_p:
10607 case Intrinsic::ppc_altivec_vcmpnew_p:
10608 case Intrinsic::ppc_altivec_vcmpnezb_p:
10609 case Intrinsic::ppc_altivec_vcmpnezh_p:
10610 case Intrinsic::ppc_altivec_vcmpnezw_p:
10611 if (Subtarget.hasP9Altivec()) {
10612 switch (IntrinsicID) {
10613 default:
10614 llvm_unreachable("Unknown comparison intrinsic.");
10615 case Intrinsic::ppc_altivec_vcmpneb_p:
10616 CompareOpc = 7;
10617 break;
10618 case Intrinsic::ppc_altivec_vcmpneh_p:
10619 CompareOpc = 71;
10620 break;
10621 case Intrinsic::ppc_altivec_vcmpnew_p:
10622 CompareOpc = 135;
10623 break;
10624 case Intrinsic::ppc_altivec_vcmpnezb_p:
10625 CompareOpc = 263;
10626 break;
10627 case Intrinsic::ppc_altivec_vcmpnezh_p:
10628 CompareOpc = 327;
10629 break;
10630 case Intrinsic::ppc_altivec_vcmpnezw_p:
10631 CompareOpc = 391;
10632 break;
10633 }
10634 isDot = true;
10635 } else
10636 return false;
10637 break;
10638 case Intrinsic::ppc_altivec_vcmpgefp_p:
10639 CompareOpc = 454;
10640 isDot = true;
10641 break;
10642 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10643 CompareOpc = 710;
10644 isDot = true;
10645 break;
10646 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10647 CompareOpc = 774;
10648 isDot = true;
10649 break;
10650 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10651 CompareOpc = 838;
10652 isDot = true;
10653 break;
10654 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10655 CompareOpc = 902;
10656 isDot = true;
10657 break;
10658 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10659 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10660 CompareOpc = 967;
10661 isDot = true;
10662 } else
10663 return false;
10664 break;
10665 case Intrinsic::ppc_altivec_vcmpgtub_p:
10666 CompareOpc = 518;
10667 isDot = true;
10668 break;
10669 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10670 CompareOpc = 582;
10671 isDot = true;
10672 break;
10673 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10674 CompareOpc = 646;
10675 isDot = true;
10676 break;
10677 case Intrinsic::ppc_altivec_vcmpgtud_p:
10678 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10679 CompareOpc = 711;
10680 isDot = true;
10681 } else
10682 return false;
10683 break;
10684
10685 case Intrinsic::ppc_altivec_vcmpequq:
10686 case Intrinsic::ppc_altivec_vcmpgtsq:
10687 case Intrinsic::ppc_altivec_vcmpgtuq:
10688 if (!Subtarget.isISA3_1())
10689 return false;
10690 switch (IntrinsicID) {
10691 default:
10692 llvm_unreachable("Unknown comparison intrinsic.");
10693 case Intrinsic::ppc_altivec_vcmpequq:
10694 CompareOpc = 455;
10695 break;
10696 case Intrinsic::ppc_altivec_vcmpgtsq:
10697 CompareOpc = 903;
10698 break;
10699 case Intrinsic::ppc_altivec_vcmpgtuq:
10700 CompareOpc = 647;
10701 break;
10702 }
10703 break;
10704
10705 // VSX predicate comparisons use the same infrastructure
10706 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10707 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10708 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10709 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10710 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10711 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10712 if (Subtarget.hasVSX()) {
10713 switch (IntrinsicID) {
10714 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10715 CompareOpc = 99;
10716 break;
10717 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10718 CompareOpc = 115;
10719 break;
10720 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10721 CompareOpc = 107;
10722 break;
10723 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10724 CompareOpc = 67;
10725 break;
10726 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10727 CompareOpc = 83;
10728 break;
10729 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10730 CompareOpc = 75;
10731 break;
10732 }
10733 isDot = true;
10734 } else
10735 return false;
10736 break;
10737
10738 // Normal Comparisons.
10739 case Intrinsic::ppc_altivec_vcmpbfp:
10740 CompareOpc = 966;
10741 break;
10742 case Intrinsic::ppc_altivec_vcmpeqfp:
10743 CompareOpc = 198;
10744 break;
10745 case Intrinsic::ppc_altivec_vcmpequb:
10746 CompareOpc = 6;
10747 break;
10748 case Intrinsic::ppc_altivec_vcmpequh:
10749 CompareOpc = 70;
10750 break;
10751 case Intrinsic::ppc_altivec_vcmpequw:
10752 CompareOpc = 134;
10753 break;
10754 case Intrinsic::ppc_altivec_vcmpequd:
10755 if (Subtarget.hasP8Altivec())
10756 CompareOpc = 199;
10757 else
10758 return false;
10759 break;
10760 case Intrinsic::ppc_altivec_vcmpneb:
10761 case Intrinsic::ppc_altivec_vcmpneh:
10762 case Intrinsic::ppc_altivec_vcmpnew:
10763 case Intrinsic::ppc_altivec_vcmpnezb:
10764 case Intrinsic::ppc_altivec_vcmpnezh:
10765 case Intrinsic::ppc_altivec_vcmpnezw:
10766 if (Subtarget.hasP9Altivec())
10767 switch (IntrinsicID) {
10768 default:
10769 llvm_unreachable("Unknown comparison intrinsic.");
10770 case Intrinsic::ppc_altivec_vcmpneb:
10771 CompareOpc = 7;
10772 break;
10773 case Intrinsic::ppc_altivec_vcmpneh:
10774 CompareOpc = 71;
10775 break;
10776 case Intrinsic::ppc_altivec_vcmpnew:
10777 CompareOpc = 135;
10778 break;
10779 case Intrinsic::ppc_altivec_vcmpnezb:
10780 CompareOpc = 263;
10781 break;
10782 case Intrinsic::ppc_altivec_vcmpnezh:
10783 CompareOpc = 327;
10784 break;
10785 case Intrinsic::ppc_altivec_vcmpnezw:
10786 CompareOpc = 391;
10787 break;
10788 }
10789 else
10790 return false;
10791 break;
10792 case Intrinsic::ppc_altivec_vcmpgefp:
10793 CompareOpc = 454;
10794 break;
10795 case Intrinsic::ppc_altivec_vcmpgtfp:
10796 CompareOpc = 710;
10797 break;
10798 case Intrinsic::ppc_altivec_vcmpgtsb:
10799 CompareOpc = 774;
10800 break;
10801 case Intrinsic::ppc_altivec_vcmpgtsh:
10802 CompareOpc = 838;
10803 break;
10804 case Intrinsic::ppc_altivec_vcmpgtsw:
10805 CompareOpc = 902;
10806 break;
10807 case Intrinsic::ppc_altivec_vcmpgtsd:
10808 if (Subtarget.hasP8Altivec())
10809 CompareOpc = 967;
10810 else
10811 return false;
10812 break;
10813 case Intrinsic::ppc_altivec_vcmpgtub:
10814 CompareOpc = 518;
10815 break;
10816 case Intrinsic::ppc_altivec_vcmpgtuh:
10817 CompareOpc = 582;
10818 break;
10819 case Intrinsic::ppc_altivec_vcmpgtuw:
10820 CompareOpc = 646;
10821 break;
10822 case Intrinsic::ppc_altivec_vcmpgtud:
10823 if (Subtarget.hasP8Altivec())
10824 CompareOpc = 711;
10825 else
10826 return false;
10827 break;
10828 case Intrinsic::ppc_altivec_vcmpequq_p:
10829 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10830 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10831 if (!Subtarget.isISA3_1())
10832 return false;
10833 switch (IntrinsicID) {
10834 default:
10835 llvm_unreachable("Unknown comparison intrinsic.");
10836 case Intrinsic::ppc_altivec_vcmpequq_p:
10837 CompareOpc = 455;
10838 break;
10839 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10840 CompareOpc = 903;
10841 break;
10842 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10843 CompareOpc = 647;
10844 break;
10845 }
10846 isDot = true;
10847 break;
10848 }
10849 return true;
10850}
10851
10852/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10853/// lower, do it, otherwise return null.
10854SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10855 SelectionDAG &DAG) const {
10856 unsigned IntrinsicID = Op.getConstantOperandVal(0);
10857
10858 SDLoc dl(Op);
10859
10860 switch (IntrinsicID) {
10861 case Intrinsic::thread_pointer:
10862 // Reads the thread pointer register, used for __builtin_thread_pointer.
10863 if (Subtarget.isPPC64())
10864 return DAG.getRegister(PPC::X13, MVT::i64);
10865 return DAG.getRegister(PPC::R2, MVT::i32);
10866
10867 case Intrinsic::ppc_rldimi: {
10868 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10869 SDValue Src = Op.getOperand(1);
10870 APInt Mask = Op.getConstantOperandAPInt(4);
10871 if (Mask.isZero())
10872 return Op.getOperand(2);
10873 if (Mask.isAllOnes())
10874 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
10875 uint64_t SH = Op.getConstantOperandVal(3);
10876 unsigned MB = 0, ME = 0;
10877 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
10878 report_fatal_error("invalid rldimi mask!");
10879 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10880 if (ME < 63 - SH) {
10881 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10882 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10883 } else if (ME > 63 - SH) {
10884 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10885 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10886 }
10887 return SDValue(
10888 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10889 {Op.getOperand(2), Src,
10890 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10891 DAG.getTargetConstant(MB, dl, MVT::i32)}),
10892 0);
10893 }
10894
10895 case Intrinsic::ppc_rlwimi: {
10896 APInt Mask = Op.getConstantOperandAPInt(4);
10897 if (Mask.isZero())
10898 return Op.getOperand(2);
10899 if (Mask.isAllOnes())
10900 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
10901 Op.getOperand(3));
10902 unsigned MB = 0, ME = 0;
10903 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
10904 report_fatal_error("invalid rlwimi mask!");
10905 return SDValue(DAG.getMachineNode(
10906 PPC::RLWIMI, dl, MVT::i32,
10907 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10908 DAG.getTargetConstant(MB, dl, MVT::i32),
10909 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10910 0);
10911 }
10912
10913 case Intrinsic::ppc_rlwnm: {
10914 if (Op.getConstantOperandVal(3) == 0)
10915 return DAG.getConstant(0, dl, MVT::i32);
10916 unsigned MB = 0, ME = 0;
10917 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
10918 report_fatal_error("invalid rlwnm mask!");
10919 return SDValue(
10920 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
10921 {Op.getOperand(1), Op.getOperand(2),
10922 DAG.getTargetConstant(MB, dl, MVT::i32),
10923 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10924 0);
10925 }
10926
10927 case Intrinsic::ppc_mma_disassemble_acc: {
10928 if (Subtarget.isISAFuture()) {
10929 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10930 SDValue WideVec =
10931 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
10932 Op.getOperand(1)),
10933 0);
10935 SDValue Value = SDValue(WideVec.getNode(), 0);
10936 SDValue Value2 = SDValue(WideVec.getNode(), 1);
10937
10938 SDValue Extract;
10939 Extract = DAG.getNode(
10940 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10941 Subtarget.isLittleEndian() ? Value2 : Value,
10942 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10943 dl, getPointerTy(DAG.getDataLayout())));
10944 RetOps.push_back(Extract);
10945 Extract = DAG.getNode(
10946 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10947 Subtarget.isLittleEndian() ? Value2 : Value,
10948 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10949 dl, getPointerTy(DAG.getDataLayout())));
10950 RetOps.push_back(Extract);
10951 Extract = DAG.getNode(
10952 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10953 Subtarget.isLittleEndian() ? Value : Value2,
10954 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10955 dl, getPointerTy(DAG.getDataLayout())));
10956 RetOps.push_back(Extract);
10957 Extract = DAG.getNode(
10958 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10959 Subtarget.isLittleEndian() ? Value : Value2,
10960 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10961 dl, getPointerTy(DAG.getDataLayout())));
10962 RetOps.push_back(Extract);
10963 return DAG.getMergeValues(RetOps, dl);
10964 }
10965 [[fallthrough]];
10966 }
10967 case Intrinsic::ppc_vsx_disassemble_pair: {
10968 int NumVecs = 2;
10969 SDValue WideVec = Op.getOperand(1);
10970 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10971 NumVecs = 4;
10972 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10973 }
10975 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10976 SDValue Extract = DAG.getNode(
10977 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10978 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10979 : VecNo,
10980 dl, getPointerTy(DAG.getDataLayout())));
10981 RetOps.push_back(Extract);
10982 }
10983 return DAG.getMergeValues(RetOps, dl);
10984 }
10985
10986 case Intrinsic::ppc_mma_xxmfacc:
10987 case Intrinsic::ppc_mma_xxmtacc: {
10988 // Allow pre-isa-future subtargets to lower as normal.
10989 if (!Subtarget.isISAFuture())
10990 return SDValue();
10991 // The intrinsics for xxmtacc and xxmfacc take one argument of
10992 // type v512i1, for future cpu the corresponding wacc instruction
10993 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
10994 // the need to produce the xxm[t|f]acc.
10995 SDValue WideVec = Op.getOperand(1);
10996 DAG.ReplaceAllUsesWith(Op, WideVec);
10997 return SDValue();
10998 }
10999
11000 case Intrinsic::ppc_unpack_longdouble: {
11001 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11002 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11003 "Argument of long double unpack must be 0 or 1!");
11004 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11005 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11006 Idx->getValueType(0)));
11007 }
11008
11009 case Intrinsic::ppc_compare_exp_lt:
11010 case Intrinsic::ppc_compare_exp_gt:
11011 case Intrinsic::ppc_compare_exp_eq:
11012 case Intrinsic::ppc_compare_exp_uo: {
11013 unsigned Pred;
11014 switch (IntrinsicID) {
11015 case Intrinsic::ppc_compare_exp_lt:
11016 Pred = PPC::PRED_LT;
11017 break;
11018 case Intrinsic::ppc_compare_exp_gt:
11019 Pred = PPC::PRED_GT;
11020 break;
11021 case Intrinsic::ppc_compare_exp_eq:
11022 Pred = PPC::PRED_EQ;
11023 break;
11024 case Intrinsic::ppc_compare_exp_uo:
11025 Pred = PPC::PRED_UN;
11026 break;
11027 }
11028 return SDValue(
11029 DAG.getMachineNode(
11030 PPC::SELECT_CC_I4, dl, MVT::i32,
11031 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11032 Op.getOperand(1), Op.getOperand(2)),
11033 0),
11034 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11035 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11036 0);
11037 }
11038 case Intrinsic::ppc_test_data_class: {
11039 EVT OpVT = Op.getOperand(1).getValueType();
11040 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11041 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11042 : PPC::XSTSTDCSP);
11043 return SDValue(
11044 DAG.getMachineNode(
11045 PPC::SELECT_CC_I4, dl, MVT::i32,
11046 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11047 Op.getOperand(1)),
11048 0),
11049 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11050 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11051 0);
11052 }
11053 case Intrinsic::ppc_fnmsub: {
11054 EVT VT = Op.getOperand(1).getValueType();
11055 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11056 return DAG.getNode(
11057 ISD::FNEG, dl, VT,
11058 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11059 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11060 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11061 Op.getOperand(2), Op.getOperand(3));
11062 }
11063 case Intrinsic::ppc_convert_f128_to_ppcf128:
11064 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11065 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11066 ? RTLIB::CONVERT_PPCF128_F128
11067 : RTLIB::CONVERT_F128_PPCF128;
11068 MakeLibCallOptions CallOptions;
11069 std::pair<SDValue, SDValue> Result =
11070 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11071 dl, SDValue());
11072 return Result.first;
11073 }
11074 case Intrinsic::ppc_maxfe:
11075 case Intrinsic::ppc_maxfl:
11076 case Intrinsic::ppc_maxfs:
11077 case Intrinsic::ppc_minfe:
11078 case Intrinsic::ppc_minfl:
11079 case Intrinsic::ppc_minfs: {
11080 EVT VT = Op.getValueType();
11081 assert(
11082 all_of(Op->ops().drop_front(4),
11083 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11084 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11085 (void)VT;
11087 if (IntrinsicID == Intrinsic::ppc_minfe ||
11088 IntrinsicID == Intrinsic::ppc_minfl ||
11089 IntrinsicID == Intrinsic::ppc_minfs)
11090 CC = ISD::SETLT;
11091 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11092 SDValue Res = Op.getOperand(I);
11093 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11094 Res =
11095 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11096 }
11097 return Res;
11098 }
11099 }
11100
11101 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11102 // opcode number of the comparison.
11103 int CompareOpc;
11104 bool isDot;
11105 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11106 return SDValue(); // Don't custom lower most intrinsics.
11107
11108 // If this is a non-dot comparison, make the VCMP node and we are done.
11109 if (!isDot) {
11110 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11111 Op.getOperand(1), Op.getOperand(2),
11112 DAG.getConstant(CompareOpc, dl, MVT::i32));
11113 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11114 }
11115
11116 // Create the PPCISD altivec 'dot' comparison node.
11117 SDValue Ops[] = {
11118 Op.getOperand(2), // LHS
11119 Op.getOperand(3), // RHS
11120 DAG.getConstant(CompareOpc, dl, MVT::i32)
11121 };
11122 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11123 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11124
11125 // Now that we have the comparison, emit a copy from the CR to a GPR.
11126 // This is flagged to the above dot comparison.
11127 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11128 DAG.getRegister(PPC::CR6, MVT::i32),
11129 CompNode.getValue(1));
11130
11131 // Unpack the result based on how the target uses it.
11132 unsigned BitNo; // Bit # of CR6.
11133 bool InvertBit; // Invert result?
11134 switch (Op.getConstantOperandVal(1)) {
11135 default: // Can't happen, don't crash on invalid number though.
11136 case 0: // Return the value of the EQ bit of CR6.
11137 BitNo = 0; InvertBit = false;
11138 break;
11139 case 1: // Return the inverted value of the EQ bit of CR6.
11140 BitNo = 0; InvertBit = true;
11141 break;
11142 case 2: // Return the value of the LT bit of CR6.
11143 BitNo = 2; InvertBit = false;
11144 break;
11145 case 3: // Return the inverted value of the LT bit of CR6.
11146 BitNo = 2; InvertBit = true;
11147 break;
11148 }
11149
11150 // Shift the bit into the low position.
11151 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11152 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11153 // Isolate the bit.
11154 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11155 DAG.getConstant(1, dl, MVT::i32));
11156
11157 // If we are supposed to, toggle the bit.
11158 if (InvertBit)
11159 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11160 DAG.getConstant(1, dl, MVT::i32));
11161 return Flags;
11162}
11163
11164SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11165 SelectionDAG &DAG) const {
11166 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11167 // the beginning of the argument list.
11168 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11169 SDLoc DL(Op);
11170 switch (Op.getConstantOperandVal(ArgStart)) {
11171 case Intrinsic::ppc_cfence: {
11172 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11173 SDValue Val = Op.getOperand(ArgStart + 1);
11174 EVT Ty = Val.getValueType();
11175 if (Ty == MVT::i128) {
11176 // FIXME: Testing one of two paired registers is sufficient to guarantee
11177 // ordering?
11178 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11179 }
11180 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11181 EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
11182 return SDValue(
11183 DAG.getMachineNode(Opcode, DL, MVT::Other,
11184 DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
11185 Op.getOperand(0)),
11186 0);
11187 }
11188 default:
11189 break;
11190 }
11191 return SDValue();
11192}
11193
11194// Lower scalar BSWAP64 to xxbrd.
11195SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11196 SDLoc dl(Op);
11197 if (!Subtarget.isPPC64())
11198 return Op;
11199 // MTVSRDD
11200 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11201 Op.getOperand(0));
11202 // XXBRD
11203 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11204 // MFVSRD
11205 int VectorIndex = 0;
11206 if (Subtarget.isLittleEndian())
11207 VectorIndex = 1;
11208 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11209 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11210 return Op;
11211}
11212
11213// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11214// compared to a value that is atomically loaded (atomic loads zero-extend).
11215SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11216 SelectionDAG &DAG) const {
11217 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11218 "Expecting an atomic compare-and-swap here.");
11219 SDLoc dl(Op);
11220 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11221 EVT MemVT = AtomicNode->getMemoryVT();
11222 if (MemVT.getSizeInBits() >= 32)
11223 return Op;
11224
11225 SDValue CmpOp = Op.getOperand(2);
11226 // If this is already correctly zero-extended, leave it alone.
11227 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11228 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11229 return Op;
11230
11231 // Clear the high bits of the compare operand.
11232 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11233 SDValue NewCmpOp =
11234 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11235 DAG.getConstant(MaskVal, dl, MVT::i32));
11236
11237 // Replace the existing compare operand with the properly zero-extended one.
11239 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11240 Ops.push_back(AtomicNode->getOperand(i));
11241 Ops[2] = NewCmpOp;
11242 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11243 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11244 auto NodeTy =
11246 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11247}
11248
11249SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11250 SelectionDAG &DAG) const {
11251 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11252 EVT MemVT = N->getMemoryVT();
11253 assert(MemVT.getSimpleVT() == MVT::i128 &&
11254 "Expect quadword atomic operations");
11255 SDLoc dl(N);
11256 unsigned Opc = N->getOpcode();
11257 switch (Opc) {
11258 case ISD::ATOMIC_LOAD: {
11259 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11260 // lowered to ppc instructions by pattern matching instruction selector.
11261 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11263 N->getOperand(0),
11264 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11265 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11266 Ops.push_back(N->getOperand(I));
11267 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11268 Ops, MemVT, N->getMemOperand());
11269 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11270 SDValue ValHi =
11271 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11272 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11273 DAG.getConstant(64, dl, MVT::i32));
11274 SDValue Val =
11275 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11276 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11277 {Val, LoadedVal.getValue(2)});
11278 }
11279 case ISD::ATOMIC_STORE: {
11280 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11281 // lowered to ppc instructions by pattern matching instruction selector.
11282 SDVTList Tys = DAG.getVTList(MVT::Other);
11284 N->getOperand(0),
11285 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11286 SDValue Val = N->getOperand(1);
11287 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11288 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11289 DAG.getConstant(64, dl, MVT::i32));
11290 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11291 Ops.push_back(ValLo);
11292 Ops.push_back(ValHi);
11293 Ops.push_back(N->getOperand(2));
11294 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11295 N->getMemOperand());
11296 }
11297 default:
11298 llvm_unreachable("Unexpected atomic opcode");
11299 }
11300}
11301
11303 SelectionDAG &DAG,
11304 const PPCSubtarget &Subtarget) {
11305 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11306
11307 enum DataClassMask {
11308 DC_NAN = 1 << 6,
11309 DC_NEG_INF = 1 << 4,
11310 DC_POS_INF = 1 << 5,
11311 DC_NEG_ZERO = 1 << 2,
11312 DC_POS_ZERO = 1 << 3,
11313 DC_NEG_SUBNORM = 1,
11314 DC_POS_SUBNORM = 1 << 1,
11315 };
11316
11317 EVT VT = Op.getValueType();
11318
11319 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11320 : VT == MVT::f64 ? PPC::XSTSTDCDP
11321 : PPC::XSTSTDCSP;
11322
11323 if (Mask == fcAllFlags)
11324 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11325 if (Mask == 0)
11326 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11327
11328 // When it's cheaper or necessary to test reverse flags.
11329 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11330 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11331 return DAG.getNOT(Dl, Rev, MVT::i1);
11332 }
11333
11334 // Power doesn't support testing whether a value is 'normal'. Test the rest
11335 // first, and test if it's 'not not-normal' with expected sign.
11336 if (Mask & fcNormal) {
11337 SDValue Rev(DAG.getMachineNode(
11338 TestOp, Dl, MVT::i32,
11339 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11340 DC_NEG_ZERO | DC_POS_ZERO |
11341 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11342 Dl, MVT::i32),
11343 Op),
11344 0);
11345 // Sign are stored in CR bit 0, result are in CR bit 2.
11346 SDValue Sign(
11347 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11348 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11349 0);
11350 SDValue Normal(DAG.getNOT(
11351 Dl,
11353 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11354 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11355 0),
11356 MVT::i1));
11357 if (Mask & fcPosNormal)
11358 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11359 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11360 if (Mask == fcPosNormal || Mask == fcNegNormal)
11361 return Result;
11362
11363 return DAG.getNode(
11364 ISD::OR, Dl, MVT::i1,
11365 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11366 }
11367
11368 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11369 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11370 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11371 bool IsQuiet = Mask & fcQNan;
11372 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11373
11374 // Quietness is determined by the first bit in fraction field.
11375 uint64_t QuietMask = 0;
11376 SDValue HighWord;
11377 if (VT == MVT::f128) {
11378 HighWord = DAG.getNode(
11379 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11380 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11381 QuietMask = 0x8000;
11382 } else if (VT == MVT::f64) {
11383 if (Subtarget.isPPC64()) {
11384 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11385 DAG.getBitcast(MVT::i64, Op),
11386 DAG.getConstant(1, Dl, MVT::i32));
11387 } else {
11388 SDValue Vec = DAG.getBitcast(
11389 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11390 HighWord = DAG.getNode(
11391 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11392 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11393 }
11394 QuietMask = 0x80000;
11395 } else if (VT == MVT::f32) {
11396 HighWord = DAG.getBitcast(MVT::i32, Op);
11397 QuietMask = 0x400000;
11398 }
11399 SDValue NanRes = DAG.getSetCC(
11400 Dl, MVT::i1,
11401 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11402 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11403 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11404 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11405 if (Mask == fcQNan || Mask == fcSNan)
11406 return NanRes;
11407
11408 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11409 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11410 NanRes);
11411 }
11412
11413 unsigned NativeMask = 0;
11414 if ((Mask & fcNan) == fcNan)
11415 NativeMask |= DC_NAN;
11416 if (Mask & fcNegInf)
11417 NativeMask |= DC_NEG_INF;
11418 if (Mask & fcPosInf)
11419 NativeMask |= DC_POS_INF;
11420 if (Mask & fcNegZero)
11421 NativeMask |= DC_NEG_ZERO;
11422 if (Mask & fcPosZero)
11423 NativeMask |= DC_POS_ZERO;
11424 if (Mask & fcNegSubnormal)
11425 NativeMask |= DC_NEG_SUBNORM;
11426 if (Mask & fcPosSubnormal)
11427 NativeMask |= DC_POS_SUBNORM;
11428 return SDValue(
11429 DAG.getMachineNode(
11430 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11432 TestOp, Dl, MVT::i32,
11433 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11434 0),
11435 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11436 0);
11437}
11438
11439SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11440 SelectionDAG &DAG) const {
11441 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11442 SDValue LHS = Op.getOperand(0);
11443 uint64_t RHSC = Op.getConstantOperandVal(1);
11444 SDLoc Dl(Op);
11445 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11446 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11447}
11448
11449SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11450 SelectionDAG &DAG) const {
11451 SDLoc dl(Op);
11452 // Create a stack slot that is 16-byte aligned.
11454 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11455 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11456 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11457
11458 // Store the input value into Value#0 of the stack slot.
11459 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
11461 // Load it out.
11462 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11463}
11464
11465SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11466 SelectionDAG &DAG) const {
11467 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11468 "Should only be called for ISD::INSERT_VECTOR_ELT");
11469
11470 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11471
11472 EVT VT = Op.getValueType();
11473 SDLoc dl(Op);
11474 SDValue V1 = Op.getOperand(0);
11475 SDValue V2 = Op.getOperand(1);
11476
11477 if (VT == MVT::v2f64 && C)
11478 return Op;
11479
11480 if (Subtarget.hasP9Vector()) {
11481 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11482 // because on P10, it allows this specific insert_vector_elt load pattern to
11483 // utilize the refactored load and store infrastructure in order to exploit
11484 // prefixed loads.
11485 // On targets with inexpensive direct moves (Power9 and up), a
11486 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11487 // load since a single precision load will involve conversion to double
11488 // precision on the load followed by another conversion to single precision.
11489 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11490 (isa<LoadSDNode>(V2))) {
11491 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11492 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11493 SDValue InsVecElt =
11494 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11495 BitcastLoad, Op.getOperand(2));
11496 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11497 }
11498 }
11499
11500 if (Subtarget.isISA3_1()) {
11501 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11502 return SDValue();
11503 // On P10, we have legal lowering for constant and variable indices for
11504 // all vectors.
11505 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11506 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11507 return Op;
11508 }
11509
11510 // Before P10, we have legal lowering for constant indices but not for
11511 // variable ones.
11512 if (!C)
11513 return SDValue();
11514
11515 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11516 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11517 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11518 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11519 unsigned InsertAtElement = C->getZExtValue();
11520 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11521 if (Subtarget.isLittleEndian()) {
11522 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11523 }
11524 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11525 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11526 }
11527 return Op;
11528}
11529
11530SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11531 SelectionDAG &DAG) const {
11532 SDLoc dl(Op);
11533 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11534 SDValue LoadChain = LN->getChain();
11535 SDValue BasePtr = LN->getBasePtr();
11536 EVT VT = Op.getValueType();
11537
11538 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11539 return Op;
11540
11541 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11542 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11543 // 2 or 4 vsx registers.
11544 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11545 "Type unsupported without MMA");
11546 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11547 "Type unsupported without paired vector support");
11548 Align Alignment = LN->getAlign();
11550 SmallVector<SDValue, 4> LoadChains;
11551 unsigned NumVecs = VT.getSizeInBits() / 128;
11552 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11553 SDValue Load =
11554 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11555 LN->getPointerInfo().getWithOffset(Idx * 16),
11556 commonAlignment(Alignment, Idx * 16),
11557 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11558 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11559 DAG.getConstant(16, dl, BasePtr.getValueType()));
11560 Loads.push_back(Load);
11561 LoadChains.push_back(Load.getValue(1));
11562 }
11563 if (Subtarget.isLittleEndian()) {
11564 std::reverse(Loads.begin(), Loads.end());
11565 std::reverse(LoadChains.begin(), LoadChains.end());
11566 }
11567 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11568 SDValue Value =
11569 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11570 dl, VT, Loads);
11571 SDValue RetOps[] = {Value, TF};
11572 return DAG.getMergeValues(RetOps, dl);
11573}
11574
11575SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11576 SelectionDAG &DAG) const {
11577 SDLoc dl(Op);
11578 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11579 SDValue StoreChain = SN->getChain();
11580 SDValue BasePtr = SN->getBasePtr();
11581 SDValue Value = SN->getValue();
11582 SDValue Value2 = SN->getValue();
11583 EVT StoreVT = Value.getValueType();
11584
11585 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11586 return Op;
11587
11588 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11589 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11590 // underlying registers individually.
11591 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11592 "Type unsupported without MMA");
11593 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11594 "Type unsupported without paired vector support");
11595 Align Alignment = SN->getAlign();
11597 unsigned NumVecs = 2;
11598 if (StoreVT == MVT::v512i1) {
11599 if (Subtarget.isISAFuture()) {
11600 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11601 MachineSDNode *ExtNode = DAG.getMachineNode(
11602 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
11603
11604 Value = SDValue(ExtNode, 0);
11605 Value2 = SDValue(ExtNode, 1);
11606 } else
11607 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11608 NumVecs = 4;
11609 }
11610 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11611 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11612 SDValue Elt;
11613 if (Subtarget.isISAFuture()) {
11614 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11615 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11616 Idx > 1 ? Value2 : Value,
11617 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11618 } else
11619 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11620 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11621
11622 SDValue Store =
11623 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11624 SN->getPointerInfo().getWithOffset(Idx * 16),
11625 commonAlignment(Alignment, Idx * 16),
11626 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11627 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11628 DAG.getConstant(16, dl, BasePtr.getValueType()));
11629 Stores.push_back(Store);
11630 }
11631 SDValue TF = DAG.getTokenFactor(dl, Stores);
11632 return TF;
11633}
11634
11635SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11636 SDLoc dl(Op);
11637 if (Op.getValueType() == MVT::v4i32) {
11638 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11639
11640 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11641 // +16 as shift amt.
11642 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11643 SDValue RHSSwap = // = vrlw RHS, 16
11644 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11645
11646 // Shrinkify inputs to v8i16.
11647 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11648 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11649 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11650
11651 // Low parts multiplied together, generating 32-bit results (we ignore the
11652 // top parts).
11653 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11654 LHS, RHS, DAG, dl, MVT::v4i32);
11655
11656 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11657 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11658 // Shift the high parts up 16 bits.
11659 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11660 Neg16, DAG, dl);
11661 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11662 } else if (Op.getValueType() == MVT::v16i8) {
11663 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11664 bool isLittleEndian = Subtarget.isLittleEndian();
11665
11666 // Multiply the even 8-bit parts, producing 16-bit sums.
11667 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11668 LHS, RHS, DAG, dl, MVT::v8i16);
11669 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11670
11671 // Multiply the odd 8-bit parts, producing 16-bit sums.
11672 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11673 LHS, RHS, DAG, dl, MVT::v8i16);
11674 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11675
11676 // Merge the results together. Because vmuleub and vmuloub are
11677 // instructions with a big-endian bias, we must reverse the
11678 // element numbering and reverse the meaning of "odd" and "even"
11679 // when generating little endian code.
11680 int Ops[16];
11681 for (unsigned i = 0; i != 8; ++i) {
11682 if (isLittleEndian) {
11683 Ops[i*2 ] = 2*i;
11684 Ops[i*2+1] = 2*i+16;
11685 } else {
11686 Ops[i*2 ] = 2*i+1;
11687 Ops[i*2+1] = 2*i+1+16;
11688 }
11689 }
11690 if (isLittleEndian)
11691 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11692 else
11693 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11694 } else {
11695 llvm_unreachable("Unknown mul to lower!");
11696 }
11697}
11698
11699SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11700 bool IsStrict = Op->isStrictFPOpcode();
11701 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11702 !Subtarget.hasP9Vector())
11703 return SDValue();
11704
11705 return Op;
11706}
11707
11708// Custom lowering for fpext vf32 to v2f64
11709SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11710
11711 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11712 "Should only be called for ISD::FP_EXTEND");
11713
11714 // FIXME: handle extends from half precision float vectors on P9.
11715 // We only want to custom lower an extend from v2f32 to v2f64.
11716 if (Op.getValueType() != MVT::v2f64 ||
11717 Op.getOperand(0).getValueType() != MVT::v2f32)
11718 return SDValue();
11719
11720 SDLoc dl(Op);
11721 SDValue Op0 = Op.getOperand(0);
11722
11723 switch (Op0.getOpcode()) {
11724 default:
11725 return SDValue();
11727 assert(Op0.getNumOperands() == 2 &&
11728 isa<ConstantSDNode>(Op0->getOperand(1)) &&
11729 "Node should have 2 operands with second one being a constant!");
11730
11731 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11732 return SDValue();
11733
11734 // Custom lower is only done for high or low doubleword.
11735 int Idx = Op0.getConstantOperandVal(1);
11736 if (Idx % 2 != 0)
11737 return SDValue();
11738
11739 // Since input is v4f32, at this point Idx is either 0 or 2.
11740 // Shift to get the doubleword position we want.
11741 int DWord = Idx >> 1;
11742
11743 // High and low word positions are different on little endian.
11744 if (Subtarget.isLittleEndian())
11745 DWord ^= 0x1;
11746
11747 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11748 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11749 }
11750 case ISD::FADD:
11751 case ISD::FMUL:
11752 case ISD::FSUB: {
11753 SDValue NewLoad[2];
11754 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11755 // Ensure both input are loads.
11756 SDValue LdOp = Op0.getOperand(i);
11757 if (LdOp.getOpcode() != ISD::LOAD)
11758 return SDValue();
11759 // Generate new load node.
11760 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11761 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11762 NewLoad[i] = DAG.getMemIntrinsicNode(
11763 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11764 LD->getMemoryVT(), LD->getMemOperand());
11765 }
11766 SDValue NewOp =
11767 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11768 NewLoad[1], Op0.getNode()->getFlags());
11769 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11770 DAG.getConstant(0, dl, MVT::i32));
11771 }
11772 case ISD::LOAD: {
11773 LoadSDNode *LD = cast<LoadSDNode>(Op0);
11774 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11775 SDValue NewLd = DAG.getMemIntrinsicNode(
11776 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11777 LD->getMemoryVT(), LD->getMemOperand());
11778 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11779 DAG.getConstant(0, dl, MVT::i32));
11780 }
11781 }
11782 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11783}
11784
11785/// LowerOperation - Provide custom lowering hooks for some operations.
11786///
11788 switch (Op.getOpcode()) {
11789 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11790 case ISD::FPOW: return lowerPow(Op, DAG);
11791 case ISD::FSIN: return lowerSin(Op, DAG);
11792 case ISD::FCOS: return lowerCos(Op, DAG);
11793 case ISD::FLOG: return lowerLog(Op, DAG);
11794 case ISD::FLOG10: return lowerLog10(Op, DAG);
11795 case ISD::FEXP: return lowerExp(Op, DAG);
11796 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11797 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11798 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11799 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11800 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11801 case ISD::STRICT_FSETCC:
11803 case ISD::SETCC: return LowerSETCC(Op, DAG);
11804 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11805 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11806
11807 case ISD::INLINEASM:
11808 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11809 // Variable argument lowering.
11810 case ISD::VASTART: return LowerVASTART(Op, DAG);
11811 case ISD::VAARG: return LowerVAARG(Op, DAG);
11812 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11813
11814 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11815 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11817 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11818
11819 // Exception handling lowering.
11820 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11821 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11822 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11823
11824 case ISD::LOAD: return LowerLOAD(Op, DAG);
11825 case ISD::STORE: return LowerSTORE(Op, DAG);
11826 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11827 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11830 case ISD::FP_TO_UINT:
11831 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11834 case ISD::UINT_TO_FP:
11835 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11836 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
11837
11838 // Lower 64-bit shifts.
11839 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11840 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11841 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11842
11843 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11844 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11845
11846 // Vector-related lowering.
11847 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11848 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11849 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11850 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11851 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11852 case ISD::MUL: return LowerMUL(Op, DAG);
11853 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11855 case ISD::FP_ROUND:
11856 return LowerFP_ROUND(Op, DAG);
11857 case ISD::ROTL: return LowerROTL(Op, DAG);
11858
11859 // For counter-based loop handling.
11860 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11861
11862 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11863
11864 // Frame & Return address.
11865 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11866 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11867
11869 return LowerINTRINSIC_VOID(Op, DAG);
11870 case ISD::BSWAP:
11871 return LowerBSWAP(Op, DAG);
11873 return LowerATOMIC_CMP_SWAP(Op, DAG);
11874 case ISD::ATOMIC_STORE:
11875 return LowerATOMIC_LOAD_STORE(Op, DAG);
11876 case ISD::IS_FPCLASS:
11877 return LowerIS_FPCLASS(Op, DAG);
11878 }
11879}
11880
11883 SelectionDAG &DAG) const {
11884 SDLoc dl(N);
11885 switch (N->getOpcode()) {
11886 default:
11887 llvm_unreachable("Do not know how to custom type legalize this operation!");
11888 case ISD::ATOMIC_LOAD: {
11889 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11890 Results.push_back(Res);
11891 Results.push_back(Res.getValue(1));
11892 break;
11893 }
11894 case ISD::READCYCLECOUNTER: {
11895 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11896 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11897
11898 Results.push_back(
11899 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11900 Results.push_back(RTB.getValue(2));
11901 break;
11902 }
11904 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
11905 break;
11906
11907 assert(N->getValueType(0) == MVT::i1 &&
11908 "Unexpected result type for CTR decrement intrinsic");
11909 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11910 N->getValueType(0));
11911 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11912 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11913 N->getOperand(1));
11914
11915 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11916 Results.push_back(NewInt.getValue(1));
11917 break;
11918 }
11920 switch (N->getConstantOperandVal(0)) {
11921 case Intrinsic::ppc_pack_longdouble:
11922 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11923 N->getOperand(2), N->getOperand(1)));
11924 break;
11925 case Intrinsic::ppc_maxfe:
11926 case Intrinsic::ppc_minfe:
11927 case Intrinsic::ppc_fnmsub:
11928 case Intrinsic::ppc_convert_f128_to_ppcf128:
11929 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11930 break;
11931 }
11932 break;
11933 }
11934 case ISD::VAARG: {
11935 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11936 return;
11937
11938 EVT VT = N->getValueType(0);
11939
11940 if (VT == MVT::i64) {
11941 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11942
11943 Results.push_back(NewNode);
11944 Results.push_back(NewNode.getValue(1));
11945 }
11946 return;
11947 }
11950 case ISD::FP_TO_SINT:
11951 case ISD::FP_TO_UINT: {
11952 // LowerFP_TO_INT() can only handle f32 and f64.
11953 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11954 MVT::ppcf128)
11955 return;
11956 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11957 Results.push_back(LoweredValue);
11958 if (N->isStrictFPOpcode())
11959 Results.push_back(LoweredValue.getValue(1));
11960 return;
11961 }
11962 case ISD::TRUNCATE: {
11963 if (!N->getValueType(0).isVector())
11964 return;
11965 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11966 if (Lowered)
11967 Results.push_back(Lowered);
11968 return;
11969 }
11970 case ISD::FSHL:
11971 case ISD::FSHR:
11972 // Don't handle funnel shifts here.
11973 return;
11974 case ISD::BITCAST:
11975 // Don't handle bitcast here.
11976 return;
11977 case ISD::FP_EXTEND:
11978 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11979 if (Lowered)
11980 Results.push_back(Lowered);
11981 return;
11982 }
11983}
11984
11985//===----------------------------------------------------------------------===//
11986// Other Lowering Code
11987//===----------------------------------------------------------------------===//
11988
11990 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11991 Function *Func = Intrinsic::getDeclaration(M, Id);
11992 return Builder.CreateCall(Func, {});
11993}
11994
11995// The mappings for emitLeading/TrailingFence is taken from
11996// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11998 Instruction *Inst,
11999 AtomicOrdering Ord) const {
12001 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12002 if (isReleaseOrStronger(Ord))
12003 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12004 return nullptr;
12005}
12006
12008 Instruction *Inst,
12009 AtomicOrdering Ord) const {
12010 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12011 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12012 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12013 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12014 if (isa<LoadInst>(Inst))
12015 return Builder.CreateCall(
12017 Builder.GetInsertBlock()->getParent()->getParent(),
12018 Intrinsic::ppc_cfence, {Inst->getType()}),
12019 {Inst});
12020 // FIXME: Can use isync for rmw operation.
12021 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12022 }
12023 return nullptr;
12024}
12025
12028 unsigned AtomicSize,
12029 unsigned BinOpcode,
12030 unsigned CmpOpcode,
12031 unsigned CmpPred) const {
12032 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12033 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12034
12035 auto LoadMnemonic = PPC::LDARX;
12036 auto StoreMnemonic = PPC::STDCX;
12037 switch (AtomicSize) {
12038 default:
12039 llvm_unreachable("Unexpected size of atomic entity");
12040 case 1:
12041 LoadMnemonic = PPC::LBARX;
12042 StoreMnemonic = PPC::STBCX;
12043 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12044 break;
12045 case 2:
12046 LoadMnemonic = PPC::LHARX;
12047 StoreMnemonic = PPC::STHCX;
12048 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12049 break;
12050 case 4:
12051 LoadMnemonic = PPC::LWARX;
12052 StoreMnemonic = PPC::STWCX;
12053 break;
12054 case 8:
12055 LoadMnemonic = PPC::LDARX;
12056 StoreMnemonic = PPC::STDCX;
12057 break;
12058 }
12059
12060 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12061 MachineFunction *F = BB->getParent();
12063
12064 Register dest = MI.getOperand(0).getReg();
12065 Register ptrA = MI.getOperand(1).getReg();
12066 Register ptrB = MI.getOperand(2).getReg();
12067 Register incr = MI.getOperand(3).getReg();
12068 DebugLoc dl = MI.getDebugLoc();
12069
12070 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12071 MachineBasicBlock *loop2MBB =
12072 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12073 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12074 F->insert(It, loopMBB);
12075 if (CmpOpcode)
12076 F->insert(It, loop2MBB);
12077 F->insert(It, exitMBB);
12078 exitMBB->splice(exitMBB->begin(), BB,
12079 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12081
12082 MachineRegisterInfo &RegInfo = F->getRegInfo();
12083 Register TmpReg = (!BinOpcode) ? incr :
12084 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12085 : &PPC::GPRCRegClass);
12086
12087 // thisMBB:
12088 // ...
12089 // fallthrough --> loopMBB
12090 BB->addSuccessor(loopMBB);
12091
12092 // loopMBB:
12093 // l[wd]arx dest, ptr
12094 // add r0, dest, incr
12095 // st[wd]cx. r0, ptr
12096 // bne- loopMBB
12097 // fallthrough --> exitMBB
12098
12099 // For max/min...
12100 // loopMBB:
12101 // l[wd]arx dest, ptr
12102 // cmpl?[wd] dest, incr
12103 // bgt exitMBB
12104 // loop2MBB:
12105 // st[wd]cx. dest, ptr
12106 // bne- loopMBB
12107 // fallthrough --> exitMBB
12108
12109 BB = loopMBB;
12110 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12111 .addReg(ptrA).addReg(ptrB);
12112 if (BinOpcode)
12113 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12114 if (CmpOpcode) {
12115 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12116 // Signed comparisons of byte or halfword values must be sign-extended.
12117 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12118 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12119 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12120 ExtReg).addReg(dest);
12121 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12122 } else
12123 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12124
12125 BuildMI(BB, dl, TII->get(PPC::BCC))
12126 .addImm(CmpPred)
12127 .addReg(CrReg)
12128 .addMBB(exitMBB);
12129 BB->addSuccessor(loop2MBB);
12130 BB->addSuccessor(exitMBB);
12131 BB = loop2MBB;
12132 }
12133 BuildMI(BB, dl, TII->get(StoreMnemonic))
12134 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12135 BuildMI(BB, dl, TII->get(PPC::BCC))
12136 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12137 BB->addSuccessor(loopMBB);
12138 BB->addSuccessor(exitMBB);
12139
12140 // exitMBB:
12141 // ...
12142 BB = exitMBB;
12143 return BB;
12144}
12145
12147 switch(MI.getOpcode()) {
12148 default:
12149 return false;
12150 case PPC::COPY:
12151 return TII->isSignExtended(MI.getOperand(1).getReg(),
12152 &MI.getMF()->getRegInfo());
12153 case PPC::LHA:
12154 case PPC::LHA8:
12155 case PPC::LHAU:
12156 case PPC::LHAU8:
12157 case PPC::LHAUX:
12158 case PPC::LHAUX8:
12159 case PPC::LHAX:
12160 case PPC::LHAX8:
12161 case PPC::LWA:
12162 case PPC::LWAUX:
12163 case PPC::LWAX:
12164 case PPC::LWAX_32:
12165 case PPC::LWA_32:
12166 case PPC::PLHA:
12167 case PPC::PLHA8:
12168 case PPC::PLHA8pc:
12169 case PPC::PLHApc:
12170 case PPC::PLWA:
12171 case PPC::PLWA8:
12172 case PPC::PLWA8pc:
12173 case PPC::PLWApc:
12174 case PPC::EXTSB:
12175 case PPC::EXTSB8:
12176 case PPC::EXTSB8_32_64:
12177 case PPC::EXTSB8_rec:
12178 case PPC::EXTSB_rec:
12179 case PPC::EXTSH:
12180 case PPC::EXTSH8:
12181 case PPC::EXTSH8_32_64:
12182 case PPC::EXTSH8_rec:
12183 case PPC::EXTSH_rec:
12184 case PPC::EXTSW:
12185 case PPC::EXTSWSLI:
12186 case PPC::EXTSWSLI_32_64:
12187 case PPC::EXTSWSLI_32_64_rec:
12188 case PPC::EXTSWSLI_rec:
12189 case PPC::EXTSW_32:
12190 case PPC::EXTSW_32_64:
12191 case PPC::EXTSW_32_64_rec:
12192 case PPC::EXTSW_rec:
12193 case PPC::SRAW:
12194 case PPC::SRAWI:
12195 case PPC::SRAWI_rec:
12196 case PPC::SRAW_rec:
12197 return true;
12198 }
12199 return false;
12200}
12201
12204 bool is8bit, // operation
12205 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12206 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12207 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12208
12209 // If this is a signed comparison and the value being compared is not known
12210 // to be sign extended, sign extend it here.
12211 DebugLoc dl = MI.getDebugLoc();
12212 MachineFunction *F = BB->getParent();
12213 MachineRegisterInfo &RegInfo = F->getRegInfo();
12214 Register incr = MI.getOperand(3).getReg();
12215 bool IsSignExtended =
12216 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12217
12218 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12219 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12220 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12221 .addReg(MI.getOperand(3).getReg());
12222 MI.getOperand(3).setReg(ValueReg);
12223 incr = ValueReg;
12224 }
12225 // If we support part-word atomic mnemonics, just use them
12226 if (Subtarget.hasPartwordAtomics())
12227 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12228 CmpPred);
12229
12230 // In 64 bit mode we have to use 64 bits for addresses, even though the
12231 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12232 // registers without caring whether they're 32 or 64, but here we're
12233 // doing actual arithmetic on the addresses.
12234 bool is64bit = Subtarget.isPPC64();
12235 bool isLittleEndian = Subtarget.isLittleEndian();
12236 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12237
12238 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12240
12241 Register dest = MI.getOperand(0).getReg();
12242 Register ptrA = MI.getOperand(1).getReg();
12243 Register ptrB = MI.getOperand(2).getReg();
12244
12245 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12246 MachineBasicBlock *loop2MBB =
12247 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12248 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12249 F->insert(It, loopMBB);
12250 if (CmpOpcode)
12251 F->insert(It, loop2MBB);
12252 F->insert(It, exitMBB);
12253 exitMBB->splice(exitMBB->begin(), BB,
12254 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12256
12257 const TargetRegisterClass *RC =
12258 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12259 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12260
12261 Register PtrReg = RegInfo.createVirtualRegister(RC);
12262 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12263 Register ShiftReg =
12264 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12265 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12266 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12267 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12268 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12269 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12270 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12271 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12272 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12273 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12274 Register Ptr1Reg;
12275 Register TmpReg =
12276 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12277
12278 // thisMBB:
12279 // ...
12280 // fallthrough --> loopMBB
12281 BB->addSuccessor(loopMBB);
12282
12283 // The 4-byte load must be aligned, while a char or short may be
12284 // anywhere in the word. Hence all this nasty bookkeeping code.
12285 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12286 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12287 // xori shift, shift1, 24 [16]
12288 // rlwinm ptr, ptr1, 0, 0, 29
12289 // slw incr2, incr, shift
12290 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12291 // slw mask, mask2, shift
12292 // loopMBB:
12293 // lwarx tmpDest, ptr
12294 // add tmp, tmpDest, incr2
12295 // andc tmp2, tmpDest, mask
12296 // and tmp3, tmp, mask
12297 // or tmp4, tmp3, tmp2
12298 // stwcx. tmp4, ptr
12299 // bne- loopMBB
12300 // fallthrough --> exitMBB
12301 // srw SrwDest, tmpDest, shift
12302 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12303 if (ptrA != ZeroReg) {
12304 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12305 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12306 .addReg(ptrA)
12307 .addReg(ptrB);
12308 } else {
12309 Ptr1Reg = ptrB;
12310 }
12311 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12312 // mode.
12313 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12314 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12315 .addImm(3)
12316 .addImm(27)
12317 .addImm(is8bit ? 28 : 27);
12318 if (!isLittleEndian)
12319 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12320 .addReg(Shift1Reg)
12321 .addImm(is8bit ? 24 : 16);
12322 if (is64bit)
12323 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12324 .addReg(Ptr1Reg)
12325 .addImm(0)
12326 .addImm(61);
12327 else
12328 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12329 .addReg(Ptr1Reg)
12330 .addImm(0)
12331 .addImm(0)
12332 .addImm(29);
12333 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12334 if (is8bit)
12335 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12336 else {
12337 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12338 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12339 .addReg(Mask3Reg)
12340 .addImm(65535);
12341 }
12342 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12343 .addReg(Mask2Reg)
12344 .addReg(ShiftReg);
12345
12346 BB = loopMBB;
12347 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12348 .addReg(ZeroReg)
12349 .addReg(PtrReg);
12350 if (BinOpcode)
12351 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12352 .addReg(Incr2Reg)
12353 .addReg(TmpDestReg);
12354 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12355 .addReg(TmpDestReg)
12356 .addReg(MaskReg);
12357 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12358 if (CmpOpcode) {
12359 // For unsigned comparisons, we can directly compare the shifted values.
12360 // For signed comparisons we shift and sign extend.
12361 Register SReg = RegInfo.createVirtualRegister(GPRC);
12362 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12363 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12364 .addReg(TmpDestReg)
12365 .addReg(MaskReg);
12366 unsigned ValueReg = SReg;
12367 unsigned CmpReg = Incr2Reg;
12368 if (CmpOpcode == PPC::CMPW) {
12369 ValueReg = RegInfo.createVirtualRegister(GPRC);
12370 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12371 .addReg(SReg)
12372 .addReg(ShiftReg);
12373 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12374 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12375 .addReg(ValueReg);
12376 ValueReg = ValueSReg;
12377 CmpReg = incr;
12378 }
12379 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12380 BuildMI(BB, dl, TII->get(PPC::BCC))
12381 .addImm(CmpPred)
12382 .addReg(CrReg)
12383 .addMBB(exitMBB);
12384 BB->addSuccessor(loop2MBB);
12385 BB->addSuccessor(exitMBB);
12386 BB = loop2MBB;
12387 }
12388 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12389 BuildMI(BB, dl, TII->get(PPC::STWCX))
12390 .addReg(Tmp4Reg)
12391 .addReg(ZeroReg)
12392 .addReg(PtrReg);
12393 BuildMI(BB, dl, TII->get(PPC::BCC))
12395 .addReg(PPC::CR0)
12396 .addMBB(loopMBB);
12397 BB->addSuccessor(loopMBB);
12398 BB->addSuccessor(exitMBB);
12399
12400 // exitMBB:
12401 // ...
12402 BB = exitMBB;
12403 // Since the shift amount is not a constant, we need to clear
12404 // the upper bits with a separate RLWINM.
12405 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12406 .addReg(SrwDestReg)
12407 .addImm(0)
12408 .addImm(is8bit ? 24 : 16)
12409 .addImm(31);
12410 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12411 .addReg(TmpDestReg)
12412 .addReg(ShiftReg);
12413 return BB;
12414}
12415
12418 MachineBasicBlock *MBB) const {
12419 DebugLoc DL = MI.getDebugLoc();
12420 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12421 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12422
12423 MachineFunction *MF = MBB->getParent();
12425
12426 const BasicBlock *BB = MBB->getBasicBlock();
12428
12429 Register DstReg = MI.getOperand(0).getReg();
12430 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12431 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12432 Register mainDstReg = MRI.createVirtualRegister(RC);
12433 Register restoreDstReg = MRI.createVirtualRegister(RC);
12434
12435 MVT PVT = getPointerTy(MF->getDataLayout());
12436 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12437 "Invalid Pointer Size!");
12438 // For v = setjmp(buf), we generate
12439 //
12440 // thisMBB:
12441 // SjLjSetup mainMBB
12442 // bl mainMBB
12443 // v_restore = 1
12444 // b sinkMBB
12445 //
12446 // mainMBB:
12447 // buf[LabelOffset] = LR
12448 // v_main = 0
12449 //
12450 // sinkMBB:
12451 // v = phi(main, restore)
12452 //
12453
12454 MachineBasicBlock *thisMBB = MBB;
12455 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12456 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12457 MF->insert(I, mainMBB);
12458 MF->insert(I, sinkMBB);
12459
12461
12462 // Transfer the remainder of BB and its successor edges to sinkMBB.
12463 sinkMBB->splice(sinkMBB->begin(), MBB,
12464 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12466
12467 // Note that the structure of the jmp_buf used here is not compatible
12468 // with that used by libc, and is not designed to be. Specifically, it
12469 // stores only those 'reserved' registers that LLVM does not otherwise
12470 // understand how to spill. Also, by convention, by the time this
12471 // intrinsic is called, Clang has already stored the frame address in the
12472 // first slot of the buffer and stack address in the third. Following the
12473 // X86 target code, we'll store the jump address in the second slot. We also
12474 // need to save the TOC pointer (R2) to handle jumps between shared
12475 // libraries, and that will be stored in the fourth slot. The thread
12476 // identifier (R13) is not affected.
12477
12478 // thisMBB:
12479 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12480 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12481 const int64_t BPOffset = 4 * PVT.getStoreSize();
12482
12483 // Prepare IP either in reg.
12484 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12485 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12486 Register BufReg = MI.getOperand(1).getReg();
12487
12488 if (Subtarget.is64BitELFABI()) {
12490 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12491 .addReg(PPC::X2)
12492 .addImm(TOCOffset)
12493 .addReg(BufReg)
12494 .cloneMemRefs(MI);
12495 }
12496
12497 // Naked functions never have a base pointer, and so we use r1. For all
12498 // other functions, this decision must be delayed until during PEI.
12499 unsigned BaseReg;
12500 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12501 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12502 else
12503 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12504
12505 MIB = BuildMI(*thisMBB, MI, DL,
12506 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12507 .addReg(BaseReg)
12508 .addImm(BPOffset)
12509 .addReg(BufReg)
12510 .cloneMemRefs(MI);
12511
12512 // Setup
12513 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12514 MIB.addRegMask(TRI->getNoPreservedMask());
12515
12516 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12517
12518 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12519 .addMBB(mainMBB);
12520 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12521
12522 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12523 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12524
12525 // mainMBB:
12526 // mainDstReg = 0
12527 MIB =
12528 BuildMI(mainMBB, DL,
12529 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12530
12531 // Store IP
12532 if (Subtarget.isPPC64()) {
12533 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12534 .addReg(LabelReg)
12535 .addImm(LabelOffset)
12536 .addReg(BufReg);
12537 } else {
12538 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12539 .addReg(LabelReg)
12540 .addImm(LabelOffset)
12541 .addReg(BufReg);
12542 }
12543 MIB.cloneMemRefs(MI);
12544
12545 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12546 mainMBB->addSuccessor(sinkMBB);
12547
12548 // sinkMBB:
12549 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12550 TII->get(PPC::PHI), DstReg)
12551 .addReg(mainDstReg).addMBB(mainMBB)
12552 .addReg(restoreDstReg).addMBB(thisMBB);
12553
12554 MI.eraseFromParent();
12555 return sinkMBB;
12556}
12557
12560 MachineBasicBlock *MBB) const {
12561 DebugLoc DL = MI.getDebugLoc();
12562 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12563
12564 MachineFunction *MF = MBB->getParent();
12566
12567 MVT PVT = getPointerTy(MF->getDataLayout());
12568 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12569 "Invalid Pointer Size!");
12570
12571 const TargetRegisterClass *RC =
12572 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12573 Register Tmp = MRI.createVirtualRegister(RC);
12574 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12575 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12576 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12577 unsigned BP =
12578 (PVT == MVT::i64)
12579 ? PPC::X30
12580 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12581 : PPC::R30);
12582
12584
12585 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12586 const int64_t SPOffset = 2 * PVT.getStoreSize();
12587 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12588 const int64_t BPOffset = 4 * PVT.getStoreSize();
12589
12590 Register BufReg = MI.getOperand(0).getReg();
12591
12592 // Reload FP (the jumped-to function may not have had a
12593 // frame pointer, and if so, then its r31 will be restored
12594 // as necessary).
12595 if (PVT == MVT::i64) {
12596 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12597 .addImm(0)
12598 .addReg(BufReg);
12599 } else {
12600 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12601 .addImm(0)
12602 .addReg(BufReg);
12603 }
12604 MIB.cloneMemRefs(MI);
12605
12606 // Reload IP
12607 if (PVT == MVT::i64) {
12608 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12609 .addImm(LabelOffset)
12610 .addReg(BufReg);
12611 } else {
12612 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12613 .addImm(LabelOffset)
12614 .addReg(BufReg);
12615 }
12616 MIB.cloneMemRefs(MI);
12617
12618 // Reload SP
12619 if (PVT == MVT::i64) {
12620 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12621 .addImm(SPOffset)
12622 .addReg(BufReg);
12623 } else {
12624 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12625 .addImm(SPOffset)
12626 .addReg(BufReg);
12627 }
12628 MIB.cloneMemRefs(MI);
12629
12630 // Reload BP
12631 if (PVT == MVT::i64) {
12632 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12633 .addImm(BPOffset)
12634 .addReg(BufReg);
12635 } else {
12636 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12637 .addImm(BPOffset)
12638 .addReg(BufReg);
12639 }
12640 MIB.cloneMemRefs(MI);
12641
12642 // Reload TOC
12643 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12645 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12646 .addImm(TOCOffset)
12647 .addReg(BufReg)
12648 .cloneMemRefs(MI);
12649 }
12650
12651 // Jump
12652 BuildMI(*MBB, MI, DL,
12653 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12654 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12655
12656 MI.eraseFromParent();
12657 return MBB;
12658}
12659
12661 // If the function specifically requests inline stack probes, emit them.
12662 if (MF.getFunction().hasFnAttribute("probe-stack"))
12663 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12664 "inline-asm";
12665 return false;
12666}
12667
12669 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12670 unsigned StackAlign = TFI->getStackAlignment();
12671 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12672 "Unexpected stack alignment");
12673 // The default stack probe size is 4096 if the function has no
12674 // stack-probe-size attribute.
12675 const Function &Fn = MF.getFunction();
12676 unsigned StackProbeSize =
12677 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12678 // Round down to the stack alignment.
12679 StackProbeSize &= ~(StackAlign - 1);
12680 return StackProbeSize ? StackProbeSize : StackAlign;
12681}
12682
12683// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12684// into three phases. In the first phase, it uses pseudo instruction
12685// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12686// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12687// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12688// MaxCallFrameSize so that it can calculate correct data area pointer.
12691 MachineBasicBlock *MBB) const {
12692 const bool isPPC64 = Subtarget.isPPC64();
12693 MachineFunction *MF = MBB->getParent();
12694 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12695 DebugLoc DL = MI.getDebugLoc();
12696 const unsigned ProbeSize = getStackProbeSize(*MF);
12697 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12699 // The CFG of probing stack looks as
12700 // +-----+
12701 // | MBB |
12702 // +--+--+
12703 // |
12704 // +----v----+
12705 // +--->+ TestMBB +---+
12706 // | +----+----+ |
12707 // | | |
12708 // | +-----v----+ |
12709 // +---+ BlockMBB | |
12710 // +----------+ |
12711 // |
12712 // +---------+ |
12713 // | TailMBB +<--+
12714 // +---------+
12715 // In MBB, calculate previous frame pointer and final stack pointer.
12716 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12717 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12718 // TailMBB is spliced via \p MI.
12719 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12720 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12721 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12722
12724 MF->insert(MBBIter, TestMBB);
12725 MF->insert(MBBIter, BlockMBB);
12726 MF->insert(MBBIter, TailMBB);
12727
12728 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12729 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12730
12731 Register DstReg = MI.getOperand(0).getReg();
12732 Register NegSizeReg = MI.getOperand(1).getReg();
12733 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12734 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12735 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12736 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12737
12738 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12739 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12740 // NegSize.
12741 unsigned ProbeOpc;
12742 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12743 ProbeOpc =
12744 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12745 else
12746 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12747 // and NegSizeReg will be allocated in the same phyreg to avoid
12748 // redundant copy when NegSizeReg has only one use which is current MI and
12749 // will be replaced by PREPARE_PROBED_ALLOCA then.
12750 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12751 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12752 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12753 .addDef(ActualNegSizeReg)
12754 .addReg(NegSizeReg)
12755 .add(MI.getOperand(2))
12756 .add(MI.getOperand(3));
12757
12758 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12759 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12760 FinalStackPtr)
12761 .addReg(SPReg)
12762 .addReg(ActualNegSizeReg);
12763
12764 // Materialize a scratch register for update.
12765 int64_t NegProbeSize = -(int64_t)ProbeSize;
12766 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12767 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12768 if (!isInt<16>(NegProbeSize)) {
12769 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12770 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12771 .addImm(NegProbeSize >> 16);
12772 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12773 ScratchReg)
12774 .addReg(TempReg)
12775 .addImm(NegProbeSize & 0xFFFF);
12776 } else
12777 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12778 .addImm(NegProbeSize);
12779
12780 {
12781 // Probing leading residual part.
12782 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12783 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12784 .addReg(ActualNegSizeReg)
12785 .addReg(ScratchReg);
12786 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12787 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12788 .addReg(Div)
12789 .addReg(ScratchReg);
12790 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12791 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12792 .addReg(Mul)
12793 .addReg(ActualNegSizeReg);
12794 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12795 .addReg(FramePointer)
12796 .addReg(SPReg)
12797 .addReg(NegMod);
12798 }
12799
12800 {
12801 // Remaining part should be multiple of ProbeSize.
12802 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12803 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12804 .addReg(SPReg)
12805 .addReg(FinalStackPtr);
12806 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12808 .addReg(CmpResult)
12809 .addMBB(TailMBB);
12810 TestMBB->addSuccessor(BlockMBB);
12811 TestMBB->addSuccessor(TailMBB);
12812 }
12813
12814 {
12815 // Touch the block.
12816 // |P...|P...|P...
12817 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12818 .addReg(FramePointer)
12819 .addReg(SPReg)
12820 .addReg(ScratchReg);
12821 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12822 BlockMBB->addSuccessor(TestMBB);
12823 }
12824
12825 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12826 // DYNAREAOFFSET pseudo instruction to get the future result.
12827 Register MaxCallFrameSizeReg =
12828 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12829 BuildMI(TailMBB, DL,
12830 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12831 MaxCallFrameSizeReg)
12832 .add(MI.getOperand(2))
12833 .add(MI.getOperand(3));
12834 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12835 .addReg(SPReg)
12836 .addReg(MaxCallFrameSizeReg);
12837
12838 // Splice instructions after MI to TailMBB.
12839 TailMBB->splice(TailMBB->end(), MBB,
12840 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12842 MBB->addSuccessor(TestMBB);
12843
12844 // Delete the pseudo instruction.
12845 MI.eraseFromParent();
12846
12847 ++NumDynamicAllocaProbed;
12848 return TailMBB;
12849}
12850
12852 switch (MI.getOpcode()) {
12853 case PPC::SELECT_CC_I4:
12854 case PPC::SELECT_CC_I8:
12855 case PPC::SELECT_CC_F4:
12856 case PPC::SELECT_CC_F8:
12857 case PPC::SELECT_CC_F16:
12858 case PPC::SELECT_CC_VRRC:
12859 case PPC::SELECT_CC_VSFRC:
12860 case PPC::SELECT_CC_VSSRC:
12861 case PPC::SELECT_CC_VSRC:
12862 case PPC::SELECT_CC_SPE4:
12863 case PPC::SELECT_CC_SPE:
12864 return true;
12865 default:
12866 return false;
12867 }
12868}
12869
12870static bool IsSelect(MachineInstr &MI) {
12871 switch (MI.getOpcode()) {
12872 case PPC::SELECT_I4:
12873 case PPC::SELECT_I8:
12874 case PPC::SELECT_F4:
12875 case PPC::SELECT_F8:
12876 case PPC::SELECT_F16:
12877 case PPC::SELECT_SPE:
12878 case PPC::SELECT_SPE4:
12879 case PPC::SELECT_VRRC:
12880 case PPC::SELECT_VSFRC:
12881 case PPC::SELECT_VSSRC:
12882 case PPC::SELECT_VSRC:
12883 return true;
12884 default:
12885 return false;
12886 }
12887}
12888
12891 MachineBasicBlock *BB) const {
12892 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12893 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12894 if (Subtarget.is64BitELFABI() &&
12895 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12896 !Subtarget.isUsingPCRelativeCalls()) {
12897 // Call lowering should have added an r2 operand to indicate a dependence
12898 // on the TOC base pointer value. It can't however, because there is no
12899 // way to mark the dependence as implicit there, and so the stackmap code
12900 // will confuse it with a regular operand. Instead, add the dependence
12901 // here.
12902 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12903 }
12904
12905 return emitPatchPoint(MI, BB);
12906 }
12907
12908 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12909 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12910 return emitEHSjLjSetJmp(MI, BB);
12911 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12912 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12913 return emitEHSjLjLongJmp(MI, BB);
12914 }
12915
12916 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12917
12918 // To "insert" these instructions we actually have to insert their
12919 // control-flow patterns.
12920 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12922
12923 MachineFunction *F = BB->getParent();
12924 MachineRegisterInfo &MRI = F->getRegInfo();
12925
12926 if (Subtarget.hasISEL() &&
12927 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12928 MI.getOpcode() == PPC::SELECT_CC_I8 ||
12929 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
12931 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12932 MI.getOpcode() == PPC::SELECT_CC_I8)
12933 Cond.push_back(MI.getOperand(4));
12934 else
12936 Cond.push_back(MI.getOperand(1));
12937
12938 DebugLoc dl = MI.getDebugLoc();
12939 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12940 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12941 } else if (IsSelectCC(MI) || IsSelect(MI)) {
12942 // The incoming instruction knows the destination vreg to set, the
12943 // condition code register to branch on, the true/false values to
12944 // select between, and a branch opcode to use.
12945
12946 // thisMBB:
12947 // ...
12948 // TrueVal = ...
12949 // cmpTY ccX, r1, r2
12950 // bCC sinkMBB
12951 // fallthrough --> copy0MBB
12952 MachineBasicBlock *thisMBB = BB;
12953 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12954 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12955 DebugLoc dl = MI.getDebugLoc();
12956 F->insert(It, copy0MBB);
12957 F->insert(It, sinkMBB);
12958
12959 // Set the call frame size on entry to the new basic blocks.
12960 // See https://reviews.llvm.org/D156113.
12961 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12962 copy0MBB->setCallFrameSize(CallFrameSize);
12963 sinkMBB->setCallFrameSize(CallFrameSize);
12964
12965 // Transfer the remainder of BB and its successor edges to sinkMBB.
12966 sinkMBB->splice(sinkMBB->begin(), BB,
12967 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12969
12970 // Next, add the true and fallthrough blocks as its successors.
12971 BB->addSuccessor(copy0MBB);
12972 BB->addSuccessor(sinkMBB);
12973
12974 if (IsSelect(MI)) {
12975 BuildMI(BB, dl, TII->get(PPC::BC))
12976 .addReg(MI.getOperand(1).getReg())
12977 .addMBB(sinkMBB);
12978 } else {
12979 unsigned SelectPred = MI.getOperand(4).getImm();
12980 BuildMI(BB, dl, TII->get(PPC::BCC))
12981 .addImm(SelectPred)
12982 .addReg(MI.getOperand(1).getReg())
12983 .addMBB(sinkMBB);
12984 }
12985
12986 // copy0MBB:
12987 // %FalseValue = ...
12988 // # fallthrough to sinkMBB
12989 BB = copy0MBB;
12990
12991 // Update machine-CFG edges
12992 BB->addSuccessor(sinkMBB);
12993
12994 // sinkMBB:
12995 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12996 // ...
12997 BB = sinkMBB;
12998 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12999 .addReg(MI.getOperand(3).getReg())
13000 .addMBB(copy0MBB)
13001 .addReg(MI.getOperand(2).getReg())
13002 .addMBB(thisMBB);
13003 } else if (MI.getOpcode() == PPC::ReadTB) {
13004 // To read the 64-bit time-base register on a 32-bit target, we read the
13005 // two halves. Should the counter have wrapped while it was being read, we
13006 // need to try again.
13007 // ...
13008 // readLoop:
13009 // mfspr Rx,TBU # load from TBU
13010 // mfspr Ry,TB # load from TB
13011 // mfspr Rz,TBU # load from TBU
13012 // cmpw crX,Rx,Rz # check if 'old'='new'
13013 // bne readLoop # branch if they're not equal
13014 // ...
13015
13016 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13017 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13018 DebugLoc dl = MI.getDebugLoc();
13019 F->insert(It, readMBB);
13020 F->insert(It, sinkMBB);
13021
13022 // Transfer the remainder of BB and its successor edges to sinkMBB.
13023 sinkMBB->splice(sinkMBB->begin(), BB,
13024 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13026
13027 BB->addSuccessor(readMBB);
13028 BB = readMBB;
13029
13030 MachineRegisterInfo &RegInfo = F->getRegInfo();
13031 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13032 Register LoReg = MI.getOperand(0).getReg();
13033 Register HiReg = MI.getOperand(1).getReg();
13034
13035 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13036 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13037 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13038
13039 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13040
13041 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13042 .addReg(HiReg)
13043 .addReg(ReadAgainReg);
13044 BuildMI(BB, dl, TII->get(PPC::BCC))
13046 .addReg(CmpReg)
13047 .addMBB(readMBB);
13048
13049 BB->addSuccessor(readMBB);
13050 BB->addSuccessor(sinkMBB);
13051 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13052 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13053 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13054 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13055 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13056 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13057 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13058 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13059
13060 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13061 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13062 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13063 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13064 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13065 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13066 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13067 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13068
13069 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13070 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13071 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13072 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13073 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13074 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13075 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13076 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13077
13078 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13079 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13080 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13081 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13082 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13083 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13084 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13085 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13086
13087 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13088 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13089 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13090 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13091 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13092 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13093 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13094 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13095
13096 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13097 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13098 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13099 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13100 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13101 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13102 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13103 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13104
13105 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13106 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13107 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13108 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13109 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13110 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13111 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13112 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13113
13114 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13115 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13116 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13117 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13118 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13119 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13120 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13121 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13122
13123 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13124 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13125 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13126 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13127 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13128 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13129 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13130 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13131
13132 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13133 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13134 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13135 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13136 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13137 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13138 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13139 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13140
13141 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13142 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13143 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13144 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13145 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13146 BB = EmitAtomicBinary(MI, BB, 4, 0);
13147 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13148 BB = EmitAtomicBinary(MI, BB, 8, 0);
13149 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13150 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13151 (Subtarget.hasPartwordAtomics() &&
13152 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13153 (Subtarget.hasPartwordAtomics() &&
13154 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13155 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13156
13157 auto LoadMnemonic = PPC::LDARX;
13158 auto StoreMnemonic = PPC::STDCX;
13159 switch (MI.getOpcode()) {
13160 default:
13161 llvm_unreachable("Compare and swap of unknown size");
13162 case PPC::ATOMIC_CMP_SWAP_I8:
13163 LoadMnemonic = PPC::LBARX;
13164 StoreMnemonic = PPC::STBCX;
13165 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13166 break;
13167 case PPC::ATOMIC_CMP_SWAP_I16:
13168 LoadMnemonic = PPC::LHARX;
13169 StoreMnemonic = PPC::STHCX;
13170 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13171 break;
13172 case PPC::ATOMIC_CMP_SWAP_I32:
13173 LoadMnemonic = PPC::LWARX;
13174 StoreMnemonic = PPC::STWCX;
13175 break;
13176 case PPC::ATOMIC_CMP_SWAP_I64:
13177 LoadMnemonic = PPC::LDARX;
13178 StoreMnemonic = PPC::STDCX;
13179 break;
13180 }
13181 MachineRegisterInfo &RegInfo = F->getRegInfo();
13182 Register dest = MI.getOperand(0).getReg();
13183 Register ptrA = MI.getOperand(1).getReg();
13184 Register ptrB = MI.getOperand(2).getReg();
13185 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13186 Register oldval = MI.getOperand(3).getReg();
13187 Register newval = MI.getOperand(4).getReg();
13188 DebugLoc dl = MI.getDebugLoc();
13189
13190 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13191 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13192 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13193 F->insert(It, loop1MBB);
13194 F->insert(It, loop2MBB);
13195 F->insert(It, exitMBB);
13196 exitMBB->splice(exitMBB->begin(), BB,
13197 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13199
13200 // thisMBB:
13201 // ...
13202 // fallthrough --> loopMBB
13203 BB->addSuccessor(loop1MBB);
13204
13205 // loop1MBB:
13206 // l[bhwd]arx dest, ptr
13207 // cmp[wd] dest, oldval
13208 // bne- exitBB
13209 // loop2MBB:
13210 // st[bhwd]cx. newval, ptr
13211 // bne- loopMBB
13212 // b exitBB
13213 // exitBB:
13214 BB = loop1MBB;
13215 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13216 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13217 .addReg(dest)
13218 .addReg(oldval);
13219 BuildMI(BB, dl, TII->get(PPC::BCC))
13221 .addReg(CrReg)
13222 .addMBB(exitMBB);
13223 BB->addSuccessor(loop2MBB);
13224 BB->addSuccessor(exitMBB);
13225
13226 BB = loop2MBB;
13227 BuildMI(BB, dl, TII->get(StoreMnemonic))
13228 .addReg(newval)
13229 .addReg(ptrA)
13230 .addReg(ptrB);
13231 BuildMI(BB, dl, TII->get(PPC::BCC))
13233 .addReg(PPC::CR0)
13234 .addMBB(loop1MBB);
13235 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13236 BB->addSuccessor(loop1MBB);
13237 BB->addSuccessor(exitMBB);
13238
13239 // exitMBB:
13240 // ...
13241 BB = exitMBB;
13242 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13243 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13244 // We must use 64-bit registers for addresses when targeting 64-bit,
13245 // since we're actually doing arithmetic on them. Other registers
13246 // can be 32-bit.
13247 bool is64bit = Subtarget.isPPC64();
13248 bool isLittleEndian = Subtarget.isLittleEndian();
13249 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13250
13251 Register dest = MI.getOperand(0).getReg();
13252 Register ptrA = MI.getOperand(1).getReg();
13253 Register ptrB = MI.getOperand(2).getReg();
13254 Register oldval = MI.getOperand(3).getReg();
13255 Register newval = MI.getOperand(4).getReg();
13256 DebugLoc dl = MI.getDebugLoc();
13257
13258 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13259 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13260 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13261 F->insert(It, loop1MBB);
13262 F->insert(It, loop2MBB);
13263 F->insert(It, exitMBB);
13264 exitMBB->splice(exitMBB->begin(), BB,
13265 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13267
13268 MachineRegisterInfo &RegInfo = F->getRegInfo();
13269 const TargetRegisterClass *RC =
13270 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13271 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13272
13273 Register PtrReg = RegInfo.createVirtualRegister(RC);
13274 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13275 Register ShiftReg =
13276 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13277 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13278 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13279 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13280 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13281 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13282 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13283 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13284 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13285 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13286 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13287 Register Ptr1Reg;
13288 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13289 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13290 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13291 // thisMBB:
13292 // ...
13293 // fallthrough --> loopMBB
13294 BB->addSuccessor(loop1MBB);
13295
13296 // The 4-byte load must be aligned, while a char or short may be
13297 // anywhere in the word. Hence all this nasty bookkeeping code.
13298 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13299 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13300 // xori shift, shift1, 24 [16]
13301 // rlwinm ptr, ptr1, 0, 0, 29
13302 // slw newval2, newval, shift
13303 // slw oldval2, oldval,shift
13304 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13305 // slw mask, mask2, shift
13306 // and newval3, newval2, mask
13307 // and oldval3, oldval2, mask
13308 // loop1MBB:
13309 // lwarx tmpDest, ptr
13310 // and tmp, tmpDest, mask
13311 // cmpw tmp, oldval3
13312 // bne- exitBB
13313 // loop2MBB:
13314 // andc tmp2, tmpDest, mask
13315 // or tmp4, tmp2, newval3
13316 // stwcx. tmp4, ptr
13317 // bne- loop1MBB
13318 // b exitBB
13319 // exitBB:
13320 // srw dest, tmpDest, shift
13321 if (ptrA != ZeroReg) {
13322 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13323 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13324 .addReg(ptrA)
13325 .addReg(ptrB);
13326 } else {
13327 Ptr1Reg = ptrB;
13328 }
13329
13330 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13331 // mode.
13332 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13333 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13334 .addImm(3)
13335 .addImm(27)
13336 .addImm(is8bit ? 28 : 27);
13337 if (!isLittleEndian)
13338 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13339 .addReg(Shift1Reg)
13340 .addImm(is8bit ? 24 : 16);
13341 if (is64bit)
13342 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13343 .addReg(Ptr1Reg)
13344 .addImm(0)
13345 .addImm(61);
13346 else
13347 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13348 .addReg(Ptr1Reg)
13349 .addImm(0)
13350 .addImm(0)
13351 .addImm(29);
13352 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13353 .addReg(newval)
13354 .addReg(ShiftReg);
13355 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13356 .addReg(oldval)
13357 .addReg(ShiftReg);
13358 if (is8bit)
13359 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13360 else {
13361 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13362 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13363 .addReg(Mask3Reg)
13364 .addImm(65535);
13365 }
13366 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13367 .addReg(Mask2Reg)
13368 .addReg(ShiftReg);
13369 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13370 .addReg(NewVal2Reg)
13371 .addReg(MaskReg);
13372 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13373 .addReg(OldVal2Reg)
13374 .addReg(MaskReg);
13375
13376 BB = loop1MBB;
13377 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13378 .addReg(ZeroReg)
13379 .addReg(PtrReg);
13380 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13381 .addReg(TmpDestReg)
13382 .addReg(MaskReg);
13383 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13384 .addReg(TmpReg)
13385 .addReg(OldVal3Reg);
13386 BuildMI(BB, dl, TII->get(PPC::BCC))
13388 .addReg(CrReg)
13389 .addMBB(exitMBB);
13390 BB->addSuccessor(loop2MBB);
13391 BB->addSuccessor(exitMBB);
13392
13393 BB = loop2MBB;
13394 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13395 .addReg(TmpDestReg)
13396 .addReg(MaskReg);
13397 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13398 .addReg(Tmp2Reg)
13399 .addReg(NewVal3Reg);
13400 BuildMI(BB, dl, TII->get(PPC::STWCX))
13401 .addReg(Tmp4Reg)
13402 .addReg(ZeroReg)
13403 .addReg(PtrReg);
13404 BuildMI(BB, dl, TII->get(PPC::BCC))
13406 .addReg(PPC::CR0)
13407 .addMBB(loop1MBB);
13408 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13409 BB->addSuccessor(loop1MBB);
13410 BB->addSuccessor(exitMBB);
13411
13412 // exitMBB:
13413 // ...
13414 BB = exitMBB;
13415 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13416 .addReg(TmpReg)
13417 .addReg(ShiftReg);
13418 } else if (MI.getOpcode() == PPC::FADDrtz) {
13419 // This pseudo performs an FADD with rounding mode temporarily forced
13420 // to round-to-zero. We emit this via custom inserter since the FPSCR
13421 // is not modeled at the SelectionDAG level.
13422 Register Dest = MI.getOperand(0).getReg();
13423 Register Src1 = MI.getOperand(1).getReg();
13424 Register Src2 = MI.getOperand(2).getReg();
13425 DebugLoc dl = MI.getDebugLoc();
13426
13427 MachineRegisterInfo &RegInfo = F->getRegInfo();
13428 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13429
13430 // Save FPSCR value.
13431 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13432
13433 // Set rounding mode to round-to-zero.
13434 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13435 .addImm(31)
13437
13438 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13439 .addImm(30)
13441
13442 // Perform addition.
13443 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13444 .addReg(Src1)
13445 .addReg(Src2);
13446 if (MI.getFlag(MachineInstr::NoFPExcept))
13448
13449 // Restore FPSCR value.
13450 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13451 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13452 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13453 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13454 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13455 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13456 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13457 ? PPC::ANDI8_rec
13458 : PPC::ANDI_rec;
13459 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13460 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13461
13462 MachineRegisterInfo &RegInfo = F->getRegInfo();
13463 Register Dest = RegInfo.createVirtualRegister(
13464 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13465
13466 DebugLoc Dl = MI.getDebugLoc();
13467 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13468 .addReg(MI.getOperand(1).getReg())
13469 .addImm(1);
13470 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13471 MI.getOperand(0).getReg())
13472 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13473 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13474 DebugLoc Dl = MI.getDebugLoc();
13475 MachineRegisterInfo &RegInfo = F->getRegInfo();
13476 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13477 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13478 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13479 MI.getOperand(0).getReg())
13480 .addReg(CRReg);
13481 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13482 DebugLoc Dl = MI.getDebugLoc();
13483 unsigned Imm = MI.getOperand(1).getImm();
13484 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13485 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13486 MI.getOperand(0).getReg())
13487 .addReg(PPC::CR0EQ);
13488 } else if (MI.getOpcode() == PPC::SETRNDi) {
13489 DebugLoc dl = MI.getDebugLoc();
13490 Register OldFPSCRReg = MI.getOperand(0).getReg();
13491
13492 // Save FPSCR value.
13493 if (MRI.use_empty(OldFPSCRReg))
13494 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13495 else
13496 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13497
13498 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13499 // the following settings:
13500 // 00 Round to nearest
13501 // 01 Round to 0
13502 // 10 Round to +inf
13503 // 11 Round to -inf
13504
13505 // When the operand is immediate, using the two least significant bits of
13506 // the immediate to set the bits 62:63 of FPSCR.
13507 unsigned Mode = MI.getOperand(1).getImm();
13508 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13509 .addImm(31)
13511
13512 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13513 .addImm(30)
13515 } else if (MI.getOpcode() == PPC::SETRND) {
13516 DebugLoc dl = MI.getDebugLoc();
13517
13518 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13519 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13520 // If the target doesn't have DirectMove, we should use stack to do the
13521 // conversion, because the target doesn't have the instructions like mtvsrd
13522 // or mfvsrd to do this conversion directly.
13523 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13524 if (Subtarget.hasDirectMove()) {
13525 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13526 .addReg(SrcReg);
13527 } else {
13528 // Use stack to do the register copy.
13529 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13530 MachineRegisterInfo &RegInfo = F->getRegInfo();
13531 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13532 if (RC == &PPC::F8RCRegClass) {
13533 // Copy register from F8RCRegClass to G8RCRegclass.
13534 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13535 "Unsupported RegClass.");
13536
13537 StoreOp = PPC::STFD;
13538 LoadOp = PPC::LD;
13539 } else {
13540 // Copy register from G8RCRegClass to F8RCRegclass.
13541 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13542 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13543 "Unsupported RegClass.");
13544 }
13545
13546 MachineFrameInfo &MFI = F->getFrameInfo();
13547 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13548
13549 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13550 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13552 MFI.getObjectAlign(FrameIdx));
13553
13554 // Store the SrcReg into the stack.
13555 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13556 .addReg(SrcReg)
13557 .addImm(0)
13558 .addFrameIndex(FrameIdx)
13559 .addMemOperand(MMOStore);
13560
13561 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13562 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13564 MFI.getObjectAlign(FrameIdx));
13565
13566 // Load from the stack where SrcReg is stored, and save to DestReg,
13567 // so we have done the RegClass conversion from RegClass::SrcReg to
13568 // RegClass::DestReg.
13569 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13570 .addImm(0)
13571 .addFrameIndex(FrameIdx)
13572 .addMemOperand(MMOLoad);
13573 }
13574 };
13575
13576 Register OldFPSCRReg = MI.getOperand(0).getReg();
13577
13578 // Save FPSCR value.
13579 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13580
13581 // When the operand is gprc register, use two least significant bits of the
13582 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13583 //
13584 // copy OldFPSCRTmpReg, OldFPSCRReg
13585 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13586 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13587 // copy NewFPSCRReg, NewFPSCRTmpReg
13588 // mtfsf 255, NewFPSCRReg
13589 MachineOperand SrcOp = MI.getOperand(1);
13590 MachineRegisterInfo &RegInfo = F->getRegInfo();
13591 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13592
13593 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13594
13595 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13596 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13597
13598 // The first operand of INSERT_SUBREG should be a register which has
13599 // subregisters, we only care about its RegClass, so we should use an
13600 // IMPLICIT_DEF register.
13601 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13602 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13603 .addReg(ImDefReg)
13604 .add(SrcOp)
13605 .addImm(1);
13606
13607 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13608 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13609 .addReg(OldFPSCRTmpReg)
13610 .addReg(ExtSrcReg)
13611 .addImm(0)
13612 .addImm(62);
13613
13614 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13615 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13616
13617 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13618 // bits of FPSCR.
13619 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13620 .addImm(255)
13621 .addReg(NewFPSCRReg)
13622 .addImm(0)
13623 .addImm(0);
13624 } else if (MI.getOpcode() == PPC::SETFLM) {
13625 DebugLoc Dl = MI.getDebugLoc();
13626
13627 // Result of setflm is previous FPSCR content, so we need to save it first.
13628 Register OldFPSCRReg = MI.getOperand(0).getReg();
13629 if (MRI.use_empty(OldFPSCRReg))
13630 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13631 else
13632 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13633
13634 // Put bits in 32:63 to FPSCR.
13635 Register NewFPSCRReg = MI.getOperand(1).getReg();
13636 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13637 .addImm(255)
13638 .addReg(NewFPSCRReg)
13639 .addImm(0)
13640 .addImm(0);
13641 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13642 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13643 return emitProbedAlloca(MI, BB);
13644 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13645 DebugLoc DL = MI.getDebugLoc();
13646 Register Src = MI.getOperand(2).getReg();
13647 Register Lo = MI.getOperand(0).getReg();
13648 Register Hi = MI.getOperand(1).getReg();
13649 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13650 .addDef(Lo)
13651 .addUse(Src, 0, PPC::sub_gp8_x1);
13652 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13653 .addDef(Hi)
13654 .addUse(Src, 0, PPC::sub_gp8_x0);
13655 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13656 MI.getOpcode() == PPC::STQX_PSEUDO) {
13657 DebugLoc DL = MI.getDebugLoc();
13658 // Ptr is used as the ptr_rc_no_r0 part
13659 // of LQ/STQ's memory operand and adding result of RA and RB,
13660 // so it has to be g8rc_and_g8rc_nox0.
13661 Register Ptr =
13662 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13663 Register Val = MI.getOperand(0).getReg();
13664 Register RA = MI.getOperand(1).getReg();
13665 Register RB = MI.getOperand(2).getReg();
13666 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13667 BuildMI(*BB, MI, DL,
13668 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13669 : TII->get(PPC::STQ))
13670 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13671 .addImm(0)
13672 .addReg(Ptr);
13673 } else {
13674 llvm_unreachable("Unexpected instr type to insert");
13675 }
13676
13677 MI.eraseFromParent(); // The pseudo instruction is gone now.
13678 return BB;
13679}
13680
13681//===----------------------------------------------------------------------===//
13682// Target Optimization Hooks
13683//===----------------------------------------------------------------------===//
13684
13685static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13686 // For the estimates, convergence is quadratic, so we essentially double the
13687 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13688 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13689 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13690 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13691 if (VT.getScalarType() == MVT::f64)
13692 RefinementSteps++;
13693 return RefinementSteps;
13694}
13695
13696SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13697 const DenormalMode &Mode) const {
13698 // We only have VSX Vector Test for software Square Root.
13699 EVT VT = Op.getValueType();
13700 if (!isTypeLegal(MVT::i1) ||
13701 (VT != MVT::f64 &&
13702 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13703 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13704
13705 SDLoc DL(Op);
13706 // The output register of FTSQRT is CR field.
13707 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13708 // ftsqrt BF,FRB
13709 // Let e_b be the unbiased exponent of the double-precision
13710 // floating-point operand in register FRB.
13711 // fe_flag is set to 1 if either of the following conditions occurs.
13712 // - The double-precision floating-point operand in register FRB is a zero,
13713 // a NaN, or an infinity, or a negative value.
13714 // - e_b is less than or equal to -970.
13715 // Otherwise fe_flag is set to 0.
13716 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13717 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13718 // exponent is less than -970)
13719 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13720 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13721 FTSQRT, SRIdxVal),
13722 0);
13723}
13724
13725SDValue
13726PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13727 SelectionDAG &DAG) const {
13728 // We only have VSX Vector Square Root.
13729 EVT VT = Op.getValueType();
13730 if (VT != MVT::f64 &&
13731 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13733
13734 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13735}
13736
13737SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13738 int Enabled, int &RefinementSteps,
13739 bool &UseOneConstNR,
13740 bool Reciprocal) const {
13741 EVT VT = Operand.getValueType();
13742 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13743 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13744 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13745 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13746 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13747 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13748
13749 // The Newton-Raphson computation with a single constant does not provide
13750 // enough accuracy on some CPUs.
13751 UseOneConstNR = !Subtarget.needsTwoConstNR();
13752 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13753 }
13754 return SDValue();
13755}
13756
13757SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13758 int Enabled,
13759 int &RefinementSteps) const {
13760 EVT VT = Operand.getValueType();
13761 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13762 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13763 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13764 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13765 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13766 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13767 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13768 }
13769 return SDValue();
13770}
13771
13772unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13773 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13774 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13775 // enabled for division), this functionality is redundant with the default
13776 // combiner logic (once the division -> reciprocal/multiply transformation
13777 // has taken place). As a result, this matters more for older cores than for
13778 // newer ones.
13779
13780 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13781 // reciprocal if there are two or more FDIVs (for embedded cores with only
13782 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13783 switch (Subtarget.getCPUDirective()) {
13784 default:
13785 return 3;
13786 case PPC::DIR_440:
13787 case PPC::DIR_A2:
13788 case PPC::DIR_E500:
13789 case PPC::DIR_E500mc:
13790 case PPC::DIR_E5500:
13791 return 2;
13792 }
13793}
13794
13795// isConsecutiveLSLoc needs to work even if all adds have not yet been
13796// collapsed, and so we need to look through chains of them.
13798 int64_t& Offset, SelectionDAG &DAG) {
13799 if (DAG.isBaseWithConstantOffset(Loc)) {
13800 Base = Loc.getOperand(0);
13801 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13802
13803 // The base might itself be a base plus an offset, and if so, accumulate
13804 // that as well.
13806 }
13807}
13808
13810 unsigned Bytes, int Dist,
13811 SelectionDAG &DAG) {
13812 if (VT.getSizeInBits() / 8 != Bytes)
13813 return false;
13814
13815 SDValue BaseLoc = Base->getBasePtr();
13816 if (Loc.getOpcode() == ISD::FrameIndex) {
13817 if (BaseLoc.getOpcode() != ISD::FrameIndex)
13818 return false;
13820 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13821 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13822 int FS = MFI.getObjectSize(FI);
13823 int BFS = MFI.getObjectSize(BFI);
13824 if (FS != BFS || FS != (int)Bytes) return false;
13825 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13826 }
13827
13828 SDValue Base1 = Loc, Base2 = BaseLoc;
13829 int64_t Offset1 = 0, Offset2 = 0;
13830 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13831 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13832 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13833 return true;
13834
13835 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13836 const GlobalValue *GV1 = nullptr;
13837 const GlobalValue *GV2 = nullptr;
13838 Offset1 = 0;
13839 Offset2 = 0;
13840 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13841 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13842 if (isGA1 && isGA2 && GV1 == GV2)
13843 return Offset1 == (Offset2 + Dist*Bytes);
13844 return false;
13845}
13846
13847// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13848// not enforce equality of the chain operands.
13850 unsigned Bytes, int Dist,
13851 SelectionDAG &DAG) {
13852 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13853 EVT VT = LS->getMemoryVT();
13854 SDValue Loc = LS->getBasePtr();
13855 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13856 }
13857
13858 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13859 EVT VT;
13860 switch (N->getConstantOperandVal(1)) {
13861 default: return false;
13862 case Intrinsic::ppc_altivec_lvx:
13863 case Intrinsic::ppc_altivec_lvxl:
13864 case Intrinsic::ppc_vsx_lxvw4x:
13865 case Intrinsic::ppc_vsx_lxvw4x_be:
13866 VT = MVT::v4i32;
13867 break;
13868 case Intrinsic::ppc_vsx_lxvd2x:
13869 case Intrinsic::ppc_vsx_lxvd2x_be:
13870 VT = MVT::v2f64;
13871 break;
13872 case Intrinsic::ppc_altivec_lvebx:
13873 VT = MVT::i8;
13874 break;
13875 case Intrinsic::ppc_altivec_lvehx:
13876 VT = MVT::i16;
13877 break;
13878 case Intrinsic::ppc_altivec_lvewx:
13879 VT = MVT::i32;
13880 break;
13881 }
13882
13883 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13884 }
13885
13886 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13887 EVT VT;
13888 switch (N->getConstantOperandVal(1)) {
13889 default: return false;
13890 case Intrinsic::ppc_altivec_stvx:
13891 case Intrinsic::ppc_altivec_stvxl:
13892 case Intrinsic::ppc_vsx_stxvw4x:
13893 VT = MVT::v4i32;
13894 break;
13895 case Intrinsic::ppc_vsx_stxvd2x:
13896 VT = MVT::v2f64;
13897 break;
13898 case Intrinsic::ppc_vsx_stxvw4x_be:
13899 VT = MVT::v4i32;
13900 break;
13901 case Intrinsic::ppc_vsx_stxvd2x_be:
13902 VT = MVT::v2f64;
13903 break;
13904 case Intrinsic::ppc_altivec_stvebx:
13905 VT = MVT::i8;
13906 break;
13907 case Intrinsic::ppc_altivec_stvehx:
13908 VT = MVT::i16;
13909 break;
13910 case Intrinsic::ppc_altivec_stvewx:
13911 VT = MVT::i32;
13912 break;
13913 }
13914
13915 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13916 }
13917
13918 return false;
13919}
13920
13921// Return true is there is a nearyby consecutive load to the one provided
13922// (regardless of alignment). We search up and down the chain, looking though
13923// token factors and other loads (but nothing else). As a result, a true result
13924// indicates that it is safe to create a new consecutive load adjacent to the
13925// load provided.
13927 SDValue Chain = LD->getChain();
13928 EVT VT = LD->getMemoryVT();
13929
13930 SmallSet<SDNode *, 16> LoadRoots;
13931 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13932 SmallSet<SDNode *, 16> Visited;
13933
13934 // First, search up the chain, branching to follow all token-factor operands.
13935 // If we find a consecutive load, then we're done, otherwise, record all
13936 // nodes just above the top-level loads and token factors.
13937 while (!Queue.empty()) {
13938 SDNode *ChainNext = Queue.pop_back_val();
13939 if (!Visited.insert(ChainNext).second)
13940 continue;
13941
13942 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13943 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13944 return true;
13945
13946 if (!Visited.count(ChainLD->getChain().getNode()))
13947 Queue.push_back(ChainLD->getChain().getNode());
13948 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13949 for (const SDUse &O : ChainNext->ops())
13950 if (!Visited.count(O.getNode()))
13951 Queue.push_back(O.getNode());
13952 } else
13953 LoadRoots.insert(ChainNext);
13954 }
13955
13956 // Second, search down the chain, starting from the top-level nodes recorded
13957 // in the first phase. These top-level nodes are the nodes just above all
13958 // loads and token factors. Starting with their uses, recursively look though
13959 // all loads (just the chain uses) and token factors to find a consecutive
13960 // load.
13961 Visited.clear();
13962 Queue.clear();
13963
13964 for (SDNode *I : LoadRoots) {
13965 Queue.push_back(I);
13966
13967 while (!Queue.empty()) {
13968 SDNode *LoadRoot = Queue.pop_back_val();
13969 if (!Visited.insert(LoadRoot).second)
13970 continue;
13971
13972 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13973 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13974 return true;
13975
13976 for (SDNode *U : LoadRoot->uses())
13977 if (((isa<MemSDNode>(U) &&
13978 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13979 U->getOpcode() == ISD::TokenFactor) &&
13980 !Visited.count(U))
13981 Queue.push_back(U);
13982 }
13983 }
13984
13985 return false;
13986}
13987
13988/// This function is called when we have proved that a SETCC node can be replaced
13989/// by subtraction (and other supporting instructions) so that the result of
13990/// comparison is kept in a GPR instead of CR. This function is purely for
13991/// codegen purposes and has some flags to guide the codegen process.
13992static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13993 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13994 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13995
13996 // Zero extend the operands to the largest legal integer. Originally, they
13997 // must be of a strictly smaller size.
13998 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13999 DAG.getConstant(Size, DL, MVT::i32));
14000 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14001 DAG.getConstant(Size, DL, MVT::i32));
14002
14003 // Swap if needed. Depends on the condition code.
14004 if (Swap)
14005 std::swap(Op0, Op1);
14006
14007 // Subtract extended integers.
14008 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14009
14010 // Move the sign bit to the least significant position and zero out the rest.
14011 // Now the least significant bit carries the result of original comparison.
14012 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14013 DAG.getConstant(Size - 1, DL, MVT::i32));
14014 auto Final = Shifted;
14015
14016 // Complement the result if needed. Based on the condition code.
14017 if (Complement)
14018 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14019 DAG.getConstant(1, DL, MVT::i64));
14020
14021 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14022}
14023
14024SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14025 DAGCombinerInfo &DCI) const {
14026 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14027
14028 SelectionDAG &DAG = DCI.DAG;
14029 SDLoc DL(N);
14030
14031 // Size of integers being compared has a critical role in the following
14032 // analysis, so we prefer to do this when all types are legal.
14033 if (!DCI.isAfterLegalizeDAG())
14034 return SDValue();
14035
14036 // If all users of SETCC extend its value to a legal integer type
14037 // then we replace SETCC with a subtraction
14038 for (const SDNode *U : N->uses())
14039 if (U->getOpcode() != ISD::ZERO_EXTEND)
14040 return SDValue();
14041
14042 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14043 auto OpSize = N->getOperand(0).getValueSizeInBits();
14044
14046
14047 if (OpSize < Size) {
14048 switch (CC) {
14049 default: break;
14050 case ISD::SETULT:
14051 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14052 case ISD::SETULE:
14053 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14054 case ISD::SETUGT:
14055 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14056 case ISD::SETUGE:
14057 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14058 }
14059 }
14060
14061 return SDValue();
14062}
14063
14064SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14065 DAGCombinerInfo &DCI) const {
14066 SelectionDAG &DAG = DCI.DAG;
14067 SDLoc dl(N);
14068
14069 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14070 // If we're tracking CR bits, we need to be careful that we don't have:
14071 // trunc(binary-ops(zext(x), zext(y)))
14072 // or
14073 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14074 // such that we're unnecessarily moving things into GPRs when it would be
14075 // better to keep them in CR bits.
14076
14077 // Note that trunc here can be an actual i1 trunc, or can be the effective
14078 // truncation that comes from a setcc or select_cc.
14079 if (N->getOpcode() == ISD::TRUNCATE &&
14080 N->getValueType(0) != MVT::i1)
14081 return SDValue();
14082
14083 if (N->getOperand(0).getValueType() != MVT::i32 &&
14084 N->getOperand(0).getValueType() != MVT::i64)
14085 return SDValue();
14086
14087 if (N->getOpcode() == ISD::SETCC ||
14088 N->getOpcode() == ISD::SELECT_CC) {
14089 // If we're looking at a comparison, then we need to make sure that the
14090 // high bits (all except for the first) don't matter the result.
14092 cast<CondCodeSDNode>(N->getOperand(
14093 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14094 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14095
14097 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14098 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14099 return SDValue();
14100 } else if (ISD::isUnsignedIntSetCC(CC)) {
14101 if (!DAG.MaskedValueIsZero(N->getOperand(0),
14102 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14103 !DAG.MaskedValueIsZero(N->getOperand(1),
14104 APInt::getHighBitsSet(OpBits, OpBits-1)))
14105 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14106 : SDValue());
14107 } else {
14108 // This is neither a signed nor an unsigned comparison, just make sure
14109 // that the high bits are equal.
14110 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14111 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14112
14113 // We don't really care about what is known about the first bit (if
14114 // anything), so pretend that it is known zero for both to ensure they can
14115 // be compared as constants.
14116 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14117 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14118
14119 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14120 Op1Known.getConstant() != Op2Known.getConstant())
14121 return SDValue();
14122 }
14123 }
14124
14125 // We now know that the higher-order bits are irrelevant, we just need to
14126 // make sure that all of the intermediate operations are bit operations, and
14127 // all inputs are extensions.
14128 if (N->getOperand(0).getOpcode() != ISD::AND &&
14129 N->getOperand(0).getOpcode() != ISD::OR &&
14130 N->getOperand(0).getOpcode() != ISD::XOR &&
14131 N->getOperand(0).getOpcode() != ISD::SELECT &&
14132 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14133 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14134 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14135 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14136 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14137 return SDValue();
14138
14139 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14140 N->getOperand(1).getOpcode() != ISD::AND &&
14141 N->getOperand(1).getOpcode() != ISD::OR &&
14142 N->getOperand(1).getOpcode() != ISD::XOR &&
14143 N->getOperand(1).getOpcode() != ISD::SELECT &&
14144 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14145 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14146 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14147 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14148 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14149 return SDValue();
14150
14152 SmallVector<SDValue, 8> BinOps, PromOps;
14154
14155 for (unsigned i = 0; i < 2; ++i) {
14156 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14157 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14158 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14159 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14160 isa<ConstantSDNode>(N->getOperand(i)))
14161 Inputs.push_back(N->getOperand(i));
14162 else
14163 BinOps.push_back(N->getOperand(i));
14164
14165 if (N->getOpcode() == ISD::TRUNCATE)
14166 break;
14167 }
14168
14169 // Visit all inputs, collect all binary operations (and, or, xor and
14170 // select) that are all fed by extensions.
14171 while (!BinOps.empty()) {
14172 SDValue BinOp = BinOps.pop_back_val();
14173
14174 if (!Visited.insert(BinOp.getNode()).second)
14175 continue;
14176
14177 PromOps.push_back(BinOp);
14178
14179 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14180 // The condition of the select is not promoted.
14181 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14182 continue;
14183 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14184 continue;
14185
14186 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14187 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14188 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14189 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14190 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14191 Inputs.push_back(BinOp.getOperand(i));
14192 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14193 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14194 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14195 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14196 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14197 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14198 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14199 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14200 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14201 BinOps.push_back(BinOp.getOperand(i));
14202 } else {
14203 // We have an input that is not an extension or another binary
14204 // operation; we'll abort this transformation.
14205 return SDValue();
14206 }
14207 }
14208 }
14209
14210 // Make sure that this is a self-contained cluster of operations (which
14211 // is not quite the same thing as saying that everything has only one
14212 // use).
14213 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14214 if (isa<ConstantSDNode>(Inputs[i]))
14215 continue;
14216
14217 for (const SDNode *User : Inputs[i].getNode()->uses()) {
14218 if (User != N && !Visited.count(User))
14219 return SDValue();
14220
14221 // Make sure that we're not going to promote the non-output-value
14222 // operand(s) or SELECT or SELECT_CC.
14223 // FIXME: Although we could sometimes handle this, and it does occur in
14224 // practice that one of the condition inputs to the select is also one of
14225 // the outputs, we currently can't deal with this.
14226 if (User->getOpcode() == ISD::SELECT) {
14227 if (User->getOperand(0) == Inputs[i])
14228 return SDValue();
14229 } else if (User->getOpcode() == ISD::SELECT_CC) {
14230 if (User->getOperand(0) == Inputs[i] ||
14231 User->getOperand(1) == Inputs[i])
14232 return SDValue();
14233 }
14234 }
14235 }
14236
14237 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14238 for (const SDNode *User : PromOps[i].getNode()->uses()) {
14239 if (User != N && !Visited.count(User))
14240 return SDValue();
14241
14242 // Make sure that we're not going to promote the non-output-value
14243 // operand(s) or SELECT or SELECT_CC.
14244 // FIXME: Although we could sometimes handle this, and it does occur in
14245 // practice that one of the condition inputs to the select is also one of
14246 // the outputs, we currently can't deal with this.
14247 if (User->getOpcode() == ISD::SELECT) {
14248 if (User->getOperand(0) == PromOps[i])
14249 return SDValue();
14250 } else if (User->getOpcode() == ISD::SELECT_CC) {
14251 if (User->getOperand(0) == PromOps[i] ||
14252 User->getOperand(1) == PromOps[i])
14253 return SDValue();
14254 }
14255 }
14256 }
14257
14258 // Replace all inputs with the extension operand.
14259 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14260 // Constants may have users outside the cluster of to-be-promoted nodes,
14261 // and so we need to replace those as we do the promotions.
14262 if (isa<ConstantSDNode>(Inputs[i]))
14263 continue;
14264 else
14265 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14266 }
14267
14268 std::list<HandleSDNode> PromOpHandles;
14269 for (auto &PromOp : PromOps)
14270 PromOpHandles.emplace_back(PromOp);
14271
14272 // Replace all operations (these are all the same, but have a different
14273 // (i1) return type). DAG.getNode will validate that the types of
14274 // a binary operator match, so go through the list in reverse so that
14275 // we've likely promoted both operands first. Any intermediate truncations or
14276 // extensions disappear.
14277 while (!PromOpHandles.empty()) {
14278 SDValue PromOp = PromOpHandles.back().getValue();
14279 PromOpHandles.pop_back();
14280
14281 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14282 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14283 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14284 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14285 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14286 PromOp.getOperand(0).getValueType() != MVT::i1) {
14287 // The operand is not yet ready (see comment below).
14288 PromOpHandles.emplace_front(PromOp);
14289 continue;
14290 }
14291
14292 SDValue RepValue = PromOp.getOperand(0);
14293 if (isa<ConstantSDNode>(RepValue))
14294 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14295
14296 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14297 continue;
14298 }
14299
14300 unsigned C;
14301 switch (PromOp.getOpcode()) {
14302 default: C = 0; break;
14303 case ISD::SELECT: C = 1; break;
14304 case ISD::SELECT_CC: C = 2; break;
14305 }
14306
14307 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14308 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14309 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14310 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14311 // The to-be-promoted operands of this node have not yet been
14312 // promoted (this should be rare because we're going through the
14313 // list backward, but if one of the operands has several users in
14314 // this cluster of to-be-promoted nodes, it is possible).
14315 PromOpHandles.emplace_front(PromOp);
14316 continue;
14317 }
14318
14320 PromOp.getNode()->op_end());
14321
14322 // If there are any constant inputs, make sure they're replaced now.
14323 for (unsigned i = 0; i < 2; ++i)
14324 if (isa<ConstantSDNode>(Ops[C+i]))
14325 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14326
14327 DAG.ReplaceAllUsesOfValueWith(PromOp,
14328 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14329 }
14330
14331 // Now we're left with the initial truncation itself.
14332 if (N->getOpcode() == ISD::TRUNCATE)
14333 return N->getOperand(0);
14334
14335 // Otherwise, this is a comparison. The operands to be compared have just
14336 // changed type (to i1), but everything else is the same.
14337 return SDValue(N, 0);
14338}
14339
14340SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14341 DAGCombinerInfo &DCI) const {
14342 SelectionDAG &DAG = DCI.DAG;
14343 SDLoc dl(N);
14344
14345 // If we're tracking CR bits, we need to be careful that we don't have:
14346 // zext(binary-ops(trunc(x), trunc(y)))
14347 // or
14348 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14349 // such that we're unnecessarily moving things into CR bits that can more
14350 // efficiently stay in GPRs. Note that if we're not certain that the high
14351 // bits are set as required by the final extension, we still may need to do
14352 // some masking to get the proper behavior.
14353
14354 // This same functionality is important on PPC64 when dealing with
14355 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14356 // the return values of functions. Because it is so similar, it is handled
14357 // here as well.
14358
14359 if (N->getValueType(0) != MVT::i32 &&
14360 N->getValueType(0) != MVT::i64)
14361 return SDValue();
14362
14363 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14364 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14365 return SDValue();
14366
14367 if (N->getOperand(0).getOpcode() != ISD::AND &&
14368 N->getOperand(0).getOpcode() != ISD::OR &&
14369 N->getOperand(0).getOpcode() != ISD::XOR &&
14370 N->getOperand(0).getOpcode() != ISD::SELECT &&
14371 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14372 return SDValue();
14373
14375 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14377
14378 // Visit all inputs, collect all binary operations (and, or, xor and
14379 // select) that are all fed by truncations.
14380 while (!BinOps.empty()) {
14381 SDValue BinOp = BinOps.pop_back_val();
14382
14383 if (!Visited.insert(BinOp.getNode()).second)
14384 continue;
14385
14386 PromOps.push_back(BinOp);
14387
14388 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14389 // The condition of the select is not promoted.
14390 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14391 continue;
14392 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14393 continue;
14394
14395 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14396 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14397 Inputs.push_back(BinOp.getOperand(i));
14398 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14399 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14400 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14401 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14402 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14403 BinOps.push_back(BinOp.getOperand(i));
14404 } else {
14405 // We have an input that is not a truncation or another binary
14406 // operation; we'll abort this transformation.
14407 return SDValue();
14408 }
14409 }
14410 }
14411
14412 // The operands of a select that must be truncated when the select is
14413 // promoted because the operand is actually part of the to-be-promoted set.
14414 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14415
14416 // Make sure that this is a self-contained cluster of operations (which
14417 // is not quite the same thing as saying that everything has only one
14418 // use).
14419 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14420 if (isa<ConstantSDNode>(Inputs[i]))
14421 continue;
14422
14423 for (SDNode *User : Inputs[i].getNode()->uses()) {
14424 if (User != N && !Visited.count(User))
14425 return SDValue();
14426
14427 // If we're going to promote the non-output-value operand(s) or SELECT or
14428 // SELECT_CC, record them for truncation.
14429 if (User->getOpcode() == ISD::SELECT) {
14430 if (User->getOperand(0) == Inputs[i])
14431 SelectTruncOp[0].insert(std::make_pair(User,
14432 User->getOperand(0).getValueType()));
14433 } else if (User->getOpcode() == ISD::SELECT_CC) {
14434 if (User->getOperand(0) == Inputs[i])
14435 SelectTruncOp[0].insert(std::make_pair(User,
14436 User->getOperand(0).getValueType()));
14437 if (User->getOperand(1) == Inputs[i])
14438 SelectTruncOp[1].insert(std::make_pair(User,
14439 User->getOperand(1).getValueType()));
14440 }
14441 }
14442 }
14443
14444 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14445 for (SDNode *User : PromOps[i].getNode()->uses()) {
14446 if (User != N && !Visited.count(User))
14447 return SDValue();
14448
14449 // If we're going to promote the non-output-value operand(s) or SELECT or
14450 // SELECT_CC, record them for truncation.
14451 if (User->getOpcode() == ISD::SELECT) {
14452 if (User->getOperand(0) == PromOps[i])
14453 SelectTruncOp[0].insert(std::make_pair(User,
14454 User->getOperand(0).getValueType()));
14455 } else if (User->getOpcode() == ISD::SELECT_CC) {
14456 if (User->getOperand(0) == PromOps[i])
14457 SelectTruncOp[0].insert(std::make_pair(User,
14458 User->getOperand(0).getValueType()));
14459 if (User->getOperand(1) == PromOps[i])
14460 SelectTruncOp[1].insert(std::make_pair(User,
14461 User->getOperand(1).getValueType()));
14462 }
14463 }
14464 }
14465
14466 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14467 bool ReallyNeedsExt = false;
14468 if (N->getOpcode() != ISD::ANY_EXTEND) {
14469 // If all of the inputs are not already sign/zero extended, then
14470 // we'll still need to do that at the end.
14471 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14472 if (isa<ConstantSDNode>(Inputs[i]))
14473 continue;
14474
14475 unsigned OpBits =
14476 Inputs[i].getOperand(0).getValueSizeInBits();
14477 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14478
14479 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14480 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14481 APInt::getHighBitsSet(OpBits,
14482 OpBits-PromBits))) ||
14483 (N->getOpcode() == ISD::SIGN_EXTEND &&
14484 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14485 (OpBits-(PromBits-1)))) {
14486 ReallyNeedsExt = true;
14487 break;
14488 }
14489 }
14490 }
14491
14492 // Replace all inputs, either with the truncation operand, or a
14493 // truncation or extension to the final output type.
14494 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14495 // Constant inputs need to be replaced with the to-be-promoted nodes that
14496 // use them because they might have users outside of the cluster of
14497 // promoted nodes.
14498 if (isa<ConstantSDNode>(Inputs[i]))
14499 continue;
14500
14501 SDValue InSrc = Inputs[i].getOperand(0);
14502 if (Inputs[i].getValueType() == N->getValueType(0))
14503 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14504 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14505 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14506 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14507 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14508 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14509 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14510 else
14511 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14512 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14513 }
14514
14515 std::list<HandleSDNode> PromOpHandles;
14516 for (auto &PromOp : PromOps)
14517 PromOpHandles.emplace_back(PromOp);
14518
14519 // Replace all operations (these are all the same, but have a different
14520 // (promoted) return type). DAG.getNode will validate that the types of
14521 // a binary operator match, so go through the list in reverse so that
14522 // we've likely promoted both operands first.
14523 while (!PromOpHandles.empty()) {
14524 SDValue PromOp = PromOpHandles.back().getValue();
14525 PromOpHandles.pop_back();
14526
14527 unsigned C;
14528 switch (PromOp.getOpcode()) {
14529 default: C = 0; break;
14530 case ISD::SELECT: C = 1; break;
14531 case ISD::SELECT_CC: C = 2; break;
14532 }
14533
14534 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14535 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14536 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14537 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14538 // The to-be-promoted operands of this node have not yet been
14539 // promoted (this should be rare because we're going through the
14540 // list backward, but if one of the operands has several users in
14541 // this cluster of to-be-promoted nodes, it is possible).
14542 PromOpHandles.emplace_front(PromOp);
14543 continue;
14544 }
14545
14546 // For SELECT and SELECT_CC nodes, we do a similar check for any
14547 // to-be-promoted comparison inputs.
14548 if (PromOp.getOpcode() == ISD::SELECT ||
14549 PromOp.getOpcode() == ISD::SELECT_CC) {
14550 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14551 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14552 (SelectTruncOp[1].count(PromOp.getNode()) &&
14553 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14554 PromOpHandles.emplace_front(PromOp);
14555 continue;
14556 }
14557 }
14558
14560 PromOp.getNode()->op_end());
14561
14562 // If this node has constant inputs, then they'll need to be promoted here.
14563 for (unsigned i = 0; i < 2; ++i) {
14564 if (!isa<ConstantSDNode>(Ops[C+i]))
14565 continue;
14566 if (Ops[C+i].getValueType() == N->getValueType(0))
14567 continue;
14568
14569 if (N->getOpcode() == ISD::SIGN_EXTEND)
14570 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14571 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14572 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14573 else
14574 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14575 }
14576
14577 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14578 // truncate them again to the original value type.
14579 if (PromOp.getOpcode() == ISD::SELECT ||
14580 PromOp.getOpcode() == ISD::SELECT_CC) {
14581 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14582 if (SI0 != SelectTruncOp[0].end())
14583 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14584 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14585 if (SI1 != SelectTruncOp[1].end())
14586 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14587 }
14588
14589 DAG.ReplaceAllUsesOfValueWith(PromOp,
14590 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14591 }
14592
14593 // Now we're left with the initial extension itself.
14594 if (!ReallyNeedsExt)
14595 return N->getOperand(0);
14596
14597 // To zero extend, just mask off everything except for the first bit (in the
14598 // i1 case).
14599 if (N->getOpcode() == ISD::ZERO_EXTEND)
14600 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14602 N->getValueSizeInBits(0), PromBits),
14603 dl, N->getValueType(0)));
14604
14605 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14606 "Invalid extension type");
14607 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14608 SDValue ShiftCst =
14609 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14610 return DAG.getNode(
14611 ISD::SRA, dl, N->getValueType(0),
14612 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14613 ShiftCst);
14614}
14615
14616SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14617 DAGCombinerInfo &DCI) const {
14618 assert(N->getOpcode() == ISD::SETCC &&
14619 "Should be called with a SETCC node");
14620
14621 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14622 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14623 SDValue LHS = N->getOperand(0);
14624 SDValue RHS = N->getOperand(1);
14625
14626 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14627 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14628 LHS.hasOneUse())
14629 std::swap(LHS, RHS);
14630
14631 // x == 0-y --> x+y == 0
14632 // x != 0-y --> x+y != 0
14633 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14634 RHS.hasOneUse()) {
14635 SDLoc DL(N);
14636 SelectionDAG &DAG = DCI.DAG;
14637 EVT VT = N->getValueType(0);
14638 EVT OpVT = LHS.getValueType();
14639 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14640 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14641 }
14642 }
14643
14644 return DAGCombineTruncBoolExt(N, DCI);
14645}
14646
14647// Is this an extending load from an f32 to an f64?
14648static bool isFPExtLoad(SDValue Op) {
14649 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14650 return LD->getExtensionType() == ISD::EXTLOAD &&
14651 Op.getValueType() == MVT::f64;
14652 return false;
14653}
14654
14655/// Reduces the number of fp-to-int conversion when building a vector.
14656///
14657/// If this vector is built out of floating to integer conversions,
14658/// transform it to a vector built out of floating point values followed by a
14659/// single floating to integer conversion of the vector.
14660/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14661/// becomes (fptosi (build_vector ($A, $B, ...)))
14662SDValue PPCTargetLowering::
14663combineElementTruncationToVectorTruncation(SDNode *N,
14664 DAGCombinerInfo &DCI) const {
14665 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14666 "Should be called with a BUILD_VECTOR node");
14667
14668 SelectionDAG &DAG = DCI.DAG;
14669 SDLoc dl(N);
14670
14671 SDValue FirstInput = N->getOperand(0);
14672 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14673 "The input operand must be an fp-to-int conversion.");
14674
14675 // This combine happens after legalization so the fp_to_[su]i nodes are
14676 // already converted to PPCSISD nodes.
14677 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14678 if (FirstConversion == PPCISD::FCTIDZ ||
14679 FirstConversion == PPCISD::FCTIDUZ ||
14680 FirstConversion == PPCISD::FCTIWZ ||
14681 FirstConversion == PPCISD::FCTIWUZ) {
14682 bool IsSplat = true;
14683 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14684 FirstConversion == PPCISD::FCTIWUZ;
14685 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14687 EVT TargetVT = N->getValueType(0);
14688 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14689 SDValue NextOp = N->getOperand(i);
14690 if (NextOp.getOpcode() != PPCISD::MFVSR)
14691 return SDValue();
14692 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14693 if (NextConversion != FirstConversion)
14694 return SDValue();
14695 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14696 // This is not valid if the input was originally double precision. It is
14697 // also not profitable to do unless this is an extending load in which
14698 // case doing this combine will allow us to combine consecutive loads.
14699 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14700 return SDValue();
14701 if (N->getOperand(i) != FirstInput)
14702 IsSplat = false;
14703 }
14704
14705 // If this is a splat, we leave it as-is since there will be only a single
14706 // fp-to-int conversion followed by a splat of the integer. This is better
14707 // for 32-bit and smaller ints and neutral for 64-bit ints.
14708 if (IsSplat)
14709 return SDValue();
14710
14711 // Now that we know we have the right type of node, get its operands
14712 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14713 SDValue In = N->getOperand(i).getOperand(0);
14714 if (Is32Bit) {
14715 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14716 // here, we know that all inputs are extending loads so this is safe).
14717 if (In.isUndef())
14718 Ops.push_back(DAG.getUNDEF(SrcVT));
14719 else {
14720 SDValue Trunc =
14721 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14722 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14723 Ops.push_back(Trunc);
14724 }
14725 } else
14726 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14727 }
14728
14729 unsigned Opcode;
14730 if (FirstConversion == PPCISD::FCTIDZ ||
14731 FirstConversion == PPCISD::FCTIWZ)
14732 Opcode = ISD::FP_TO_SINT;
14733 else
14734 Opcode = ISD::FP_TO_UINT;
14735
14736 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14737 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14738 return DAG.getNode(Opcode, dl, TargetVT, BV);
14739 }
14740 return SDValue();
14741}
14742
14743/// Reduce the number of loads when building a vector.
14744///
14745/// Building a vector out of multiple loads can be converted to a load
14746/// of the vector type if the loads are consecutive. If the loads are
14747/// consecutive but in descending order, a shuffle is added at the end
14748/// to reorder the vector.
14750 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14751 "Should be called with a BUILD_VECTOR node");
14752
14753 SDLoc dl(N);
14754
14755 // Return early for non byte-sized type, as they can't be consecutive.
14756 if (!N->getValueType(0).getVectorElementType().isByteSized())
14757 return SDValue();
14758
14759 bool InputsAreConsecutiveLoads = true;
14760 bool InputsAreReverseConsecutive = true;
14761 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14762 SDValue FirstInput = N->getOperand(0);
14763 bool IsRoundOfExtLoad = false;
14764 LoadSDNode *FirstLoad = nullptr;
14765
14766 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14767 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14768 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14769 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14770 }
14771 // Not a build vector of (possibly fp_rounded) loads.
14772 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14773 N->getNumOperands() == 1)
14774 return SDValue();
14775
14776 if (!IsRoundOfExtLoad)
14777 FirstLoad = cast<LoadSDNode>(FirstInput);
14778
14780 InputLoads.push_back(FirstLoad);
14781 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14782 // If any inputs are fp_round(extload), they all must be.
14783 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14784 return SDValue();
14785
14786 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14787 N->getOperand(i);
14788 if (NextInput.getOpcode() != ISD::LOAD)
14789 return SDValue();
14790
14791 SDValue PreviousInput =
14792 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14793 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14794 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14795
14796 // If any inputs are fp_round(extload), they all must be.
14797 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14798 return SDValue();
14799
14800 // We only care about regular loads. The PPC-specific load intrinsics
14801 // will not lead to a merge opportunity.
14802 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14803 InputsAreConsecutiveLoads = false;
14804 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14805 InputsAreReverseConsecutive = false;
14806
14807 // Exit early if the loads are neither consecutive nor reverse consecutive.
14808 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14809 return SDValue();
14810 InputLoads.push_back(LD2);
14811 }
14812
14813 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14814 "The loads cannot be both consecutive and reverse consecutive.");
14815
14816 SDValue WideLoad;
14817 SDValue ReturnSDVal;
14818 if (InputsAreConsecutiveLoads) {
14819 assert(FirstLoad && "Input needs to be a LoadSDNode.");
14820 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14821 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14822 FirstLoad->getAlign());
14823 ReturnSDVal = WideLoad;
14824 } else if (InputsAreReverseConsecutive) {
14825 LoadSDNode *LastLoad = InputLoads.back();
14826 assert(LastLoad && "Input needs to be a LoadSDNode.");
14827 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14828 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14829 LastLoad->getAlign());
14831 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14832 Ops.push_back(i);
14833
14834 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14835 DAG.getUNDEF(N->getValueType(0)), Ops);
14836 } else
14837 return SDValue();
14838
14839 for (auto *LD : InputLoads)
14840 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14841 return ReturnSDVal;
14842}
14843
14844// This function adds the required vector_shuffle needed to get
14845// the elements of the vector extract in the correct position
14846// as specified by the CorrectElems encoding.
14848 SDValue Input, uint64_t Elems,
14849 uint64_t CorrectElems) {
14850 SDLoc dl(N);
14851
14852 unsigned NumElems = Input.getValueType().getVectorNumElements();
14853 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14854
14855 // Knowing the element indices being extracted from the original
14856 // vector and the order in which they're being inserted, just put
14857 // them at element indices required for the instruction.
14858 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14859 if (DAG.getDataLayout().isLittleEndian())
14860 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14861 else
14862 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14863 CorrectElems = CorrectElems >> 8;
14864 Elems = Elems >> 8;
14865 }
14866
14867 SDValue Shuffle =
14868 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14869 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14870
14871 EVT VT = N->getValueType(0);
14872 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14873
14874 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14877 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14878 DAG.getValueType(ExtVT));
14879}
14880
14881// Look for build vector patterns where input operands come from sign
14882// extended vector_extract elements of specific indices. If the correct indices
14883// aren't used, add a vector shuffle to fix up the indices and create
14884// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14885// during instruction selection.
14887 // This array encodes the indices that the vector sign extend instructions
14888 // extract from when extending from one type to another for both BE and LE.
14889 // The right nibble of each byte corresponds to the LE incides.
14890 // and the left nibble of each byte corresponds to the BE incides.
14891 // For example: 0x3074B8FC byte->word
14892 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14893 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14894 // For example: 0x000070F8 byte->double word
14895 // For LE: the allowed indices are: 0x0,0x8
14896 // For BE: the allowed indices are: 0x7,0xF
14897 uint64_t TargetElems[] = {
14898 0x3074B8FC, // b->w
14899 0x000070F8, // b->d
14900 0x10325476, // h->w
14901 0x00003074, // h->d
14902 0x00001032, // w->d
14903 };
14904
14905 uint64_t Elems = 0;
14906 int Index;
14907 SDValue Input;
14908
14909 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14910 if (!Op)
14911 return false;
14912 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14913 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14914 return false;
14915
14916 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14917 // of the right width.
14918 SDValue Extract = Op.getOperand(0);
14919 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14920 Extract = Extract.getOperand(0);
14921 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14922 return false;
14923
14924 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14925 if (!ExtOp)
14926 return false;
14927
14928 Index = ExtOp->getZExtValue();
14929 if (Input && Input != Extract.getOperand(0))
14930 return false;
14931
14932 if (!Input)
14933 Input = Extract.getOperand(0);
14934
14935 Elems = Elems << 8;
14936 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14937 Elems |= Index;
14938
14939 return true;
14940 };
14941
14942 // If the build vector operands aren't sign extended vector extracts,
14943 // of the same input vector, then return.
14944 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14945 if (!isSExtOfVecExtract(N->getOperand(i))) {
14946 return SDValue();
14947 }
14948 }
14949
14950 // If the vector extract indices are not correct, add the appropriate
14951 // vector_shuffle.
14952 int TgtElemArrayIdx;
14953 int InputSize = Input.getValueType().getScalarSizeInBits();
14954 int OutputSize = N->getValueType(0).getScalarSizeInBits();
14955 if (InputSize + OutputSize == 40)
14956 TgtElemArrayIdx = 0;
14957 else if (InputSize + OutputSize == 72)
14958 TgtElemArrayIdx = 1;
14959 else if (InputSize + OutputSize == 48)
14960 TgtElemArrayIdx = 2;
14961 else if (InputSize + OutputSize == 80)
14962 TgtElemArrayIdx = 3;
14963 else if (InputSize + OutputSize == 96)
14964 TgtElemArrayIdx = 4;
14965 else
14966 return SDValue();
14967
14968 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14969 CorrectElems = DAG.getDataLayout().isLittleEndian()
14970 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14971 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14972 if (Elems != CorrectElems) {
14973 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14974 }
14975
14976 // Regular lowering will catch cases where a shuffle is not needed.
14977 return SDValue();
14978}
14979
14980// Look for the pattern of a load from a narrow width to i128, feeding
14981// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14982// (LXVRZX). This node represents a zero extending load that will be matched
14983// to the Load VSX Vector Rightmost instructions.
14985 SDLoc DL(N);
14986
14987 // This combine is only eligible for a BUILD_VECTOR of v1i128.
14988 if (N->getValueType(0) != MVT::v1i128)
14989 return SDValue();
14990
14991 SDValue Operand = N->getOperand(0);
14992 // Proceed with the transformation if the operand to the BUILD_VECTOR
14993 // is a load instruction.
14994 if (Operand.getOpcode() != ISD::LOAD)
14995 return SDValue();
14996
14997 auto *LD = cast<LoadSDNode>(Operand);
14998 EVT MemoryType = LD->getMemoryVT();
14999
15000 // This transformation is only valid if the we are loading either a byte,
15001 // halfword, word, or doubleword.
15002 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15003 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15004
15005 // Ensure that the load from the narrow width is being zero extended to i128.
15006 if (!ValidLDType ||
15007 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15008 LD->getExtensionType() != ISD::EXTLOAD))
15009 return SDValue();
15010
15011 SDValue LoadOps[] = {
15012 LD->getChain(), LD->getBasePtr(),
15013 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15014
15016 DAG.getVTList(MVT::v1i128, MVT::Other),
15017 LoadOps, MemoryType, LD->getMemOperand());
15018}
15019
15020SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15021 DAGCombinerInfo &DCI) const {
15022 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15023 "Should be called with a BUILD_VECTOR node");
15024
15025 SelectionDAG &DAG = DCI.DAG;
15026 SDLoc dl(N);
15027
15028 if (!Subtarget.hasVSX())
15029 return SDValue();
15030
15031 // The target independent DAG combiner will leave a build_vector of
15032 // float-to-int conversions intact. We can generate MUCH better code for
15033 // a float-to-int conversion of a vector of floats.
15034 SDValue FirstInput = N->getOperand(0);
15035 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15036 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15037 if (Reduced)
15038 return Reduced;
15039 }
15040
15041 // If we're building a vector out of consecutive loads, just load that
15042 // vector type.
15043 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15044 if (Reduced)
15045 return Reduced;
15046
15047 // If we're building a vector out of extended elements from another vector
15048 // we have P9 vector integer extend instructions. The code assumes legal
15049 // input types (i.e. it can't handle things like v4i16) so do not run before
15050 // legalization.
15051 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15052 Reduced = combineBVOfVecSExt(N, DAG);
15053 if (Reduced)
15054 return Reduced;
15055 }
15056
15057 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15058 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15059 // is a load from <valid narrow width> to i128.
15060 if (Subtarget.isISA3_1()) {
15061 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15062 if (BVOfZLoad)
15063 return BVOfZLoad;
15064 }
15065
15066 if (N->getValueType(0) != MVT::v2f64)
15067 return SDValue();
15068
15069 // Looking for:
15070 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15071 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15072 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15073 return SDValue();
15074 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15075 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15076 return SDValue();
15077 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15078 return SDValue();
15079
15080 SDValue Ext1 = FirstInput.getOperand(0);
15081 SDValue Ext2 = N->getOperand(1).getOperand(0);
15082 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15084 return SDValue();
15085
15086 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15087 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15088 if (!Ext1Op || !Ext2Op)
15089 return SDValue();
15090 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15091 Ext1.getOperand(0) != Ext2.getOperand(0))
15092 return SDValue();
15093
15094 int FirstElem = Ext1Op->getZExtValue();
15095 int SecondElem = Ext2Op->getZExtValue();
15096 int SubvecIdx;
15097 if (FirstElem == 0 && SecondElem == 1)
15098 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15099 else if (FirstElem == 2 && SecondElem == 3)
15100 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15101 else
15102 return SDValue();
15103
15104 SDValue SrcVec = Ext1.getOperand(0);
15105 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15107 return DAG.getNode(NodeType, dl, MVT::v2f64,
15108 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15109}
15110
15111SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15112 DAGCombinerInfo &DCI) const {
15113 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15114 N->getOpcode() == ISD::UINT_TO_FP) &&
15115 "Need an int -> FP conversion node here");
15116
15117 if (useSoftFloat() || !Subtarget.has64BitSupport())
15118 return SDValue();
15119
15120 SelectionDAG &DAG = DCI.DAG;
15121 SDLoc dl(N);
15122 SDValue Op(N, 0);
15123
15124 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15125 // from the hardware.
15126 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15127 return SDValue();
15128 if (!Op.getOperand(0).getValueType().isSimple())
15129 return SDValue();
15130 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15131 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15132 return SDValue();
15133
15134 SDValue FirstOperand(Op.getOperand(0));
15135 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15136 (FirstOperand.getValueType() == MVT::i8 ||
15137 FirstOperand.getValueType() == MVT::i16);
15138 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15139 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15140 bool DstDouble = Op.getValueType() == MVT::f64;
15141 unsigned ConvOp = Signed ?
15142 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15143 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15144 SDValue WidthConst =
15145 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15146 dl, false);
15147 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15148 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15150 DAG.getVTList(MVT::f64, MVT::Other),
15151 Ops, MVT::i8, LDN->getMemOperand());
15152 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15153
15154 // For signed conversion, we need to sign-extend the value in the VSR
15155 if (Signed) {
15156 SDValue ExtOps[] = { Ld, WidthConst };
15157 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15158 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15159 } else
15160 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15161 }
15162
15163
15164 // For i32 intermediate values, unfortunately, the conversion functions
15165 // leave the upper 32 bits of the value are undefined. Within the set of
15166 // scalar instructions, we have no method for zero- or sign-extending the
15167 // value. Thus, we cannot handle i32 intermediate values here.
15168 if (Op.getOperand(0).getValueType() == MVT::i32)
15169 return SDValue();
15170
15171 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15172 "UINT_TO_FP is supported only with FPCVT");
15173
15174 // If we have FCFIDS, then use it when converting to single-precision.
15175 // Otherwise, convert to double-precision and then round.
15176 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15177 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15179 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15180 : PPCISD::FCFID);
15181 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15182 ? MVT::f32
15183 : MVT::f64;
15184
15185 // If we're converting from a float, to an int, and back to a float again,
15186 // then we don't need the store/load pair at all.
15187 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15188 Subtarget.hasFPCVT()) ||
15189 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15190 SDValue Src = Op.getOperand(0).getOperand(0);
15191 if (Src.getValueType() == MVT::f32) {
15192 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15193 DCI.AddToWorklist(Src.getNode());
15194 } else if (Src.getValueType() != MVT::f64) {
15195 // Make sure that we don't pick up a ppc_fp128 source value.
15196 return SDValue();
15197 }
15198
15199 unsigned FCTOp =
15200 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15202
15203 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15204 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15205
15206 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15207 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15208 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15209 DCI.AddToWorklist(FP.getNode());
15210 }
15211
15212 return FP;
15213 }
15214
15215 return SDValue();
15216}
15217
15218// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15219// builtins) into loads with swaps.
15221 DAGCombinerInfo &DCI) const {
15222 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15223 // load combines.
15224 if (DCI.isBeforeLegalizeOps())
15225 return SDValue();
15226
15227 SelectionDAG &DAG = DCI.DAG;
15228 SDLoc dl(N);
15229 SDValue Chain;
15230 SDValue Base;
15231 MachineMemOperand *MMO;
15232
15233 switch (N->getOpcode()) {
15234 default:
15235 llvm_unreachable("Unexpected opcode for little endian VSX load");
15236 case ISD::LOAD: {
15237 LoadSDNode *LD = cast<LoadSDNode>(N);
15238 Chain = LD->getChain();
15239 Base = LD->getBasePtr();
15240 MMO = LD->getMemOperand();
15241 // If the MMO suggests this isn't a load of a full vector, leave
15242 // things alone. For a built-in, we have to make the change for
15243 // correctness, so if there is a size problem that will be a bug.
15244 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15245 return SDValue();
15246 break;
15247 }
15249 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15250 Chain = Intrin->getChain();
15251 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15252 // us what we want. Get operand 2 instead.
15253 Base = Intrin->getOperand(2);
15254 MMO = Intrin->getMemOperand();
15255 break;
15256 }
15257 }
15258
15259 MVT VecTy = N->getValueType(0).getSimpleVT();
15260
15261 SDValue LoadOps[] = { Chain, Base };
15263 DAG.getVTList(MVT::v2f64, MVT::Other),
15264 LoadOps, MVT::v2f64, MMO);
15265
15266 DCI.AddToWorklist(Load.getNode());
15267 Chain = Load.getValue(1);
15268 SDValue Swap = DAG.getNode(
15269 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15270 DCI.AddToWorklist(Swap.getNode());
15271
15272 // Add a bitcast if the resulting load type doesn't match v2f64.
15273 if (VecTy != MVT::v2f64) {
15274 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15275 DCI.AddToWorklist(N.getNode());
15276 // Package {bitcast value, swap's chain} to match Load's shape.
15277 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15278 N, Swap.getValue(1));
15279 }
15280
15281 return Swap;
15282}
15283
15284// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15285// builtins) into stores with swaps.
15287 DAGCombinerInfo &DCI) const {
15288 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15289 // store combines.
15290 if (DCI.isBeforeLegalizeOps())
15291 return SDValue();
15292
15293 SelectionDAG &DAG = DCI.DAG;
15294 SDLoc dl(N);
15295 SDValue Chain;
15296 SDValue Base;
15297 unsigned SrcOpnd;
15298 MachineMemOperand *MMO;
15299
15300 switch (N->getOpcode()) {
15301 default:
15302 llvm_unreachable("Unexpected opcode for little endian VSX store");
15303 case ISD::STORE: {
15304 StoreSDNode *ST = cast<StoreSDNode>(N);
15305 Chain = ST->getChain();
15306 Base = ST->getBasePtr();
15307 MMO = ST->getMemOperand();
15308 SrcOpnd = 1;
15309 // If the MMO suggests this isn't a store of a full vector, leave
15310 // things alone. For a built-in, we have to make the change for
15311 // correctness, so if there is a size problem that will be a bug.
15312 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15313 return SDValue();
15314 break;
15315 }
15316 case ISD::INTRINSIC_VOID: {
15317 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15318 Chain = Intrin->getChain();
15319 // Intrin->getBasePtr() oddly does not get what we want.
15320 Base = Intrin->getOperand(3);
15321 MMO = Intrin->getMemOperand();
15322 SrcOpnd = 2;
15323 break;
15324 }
15325 }
15326
15327 SDValue Src = N->getOperand(SrcOpnd);
15328 MVT VecTy = Src.getValueType().getSimpleVT();
15329
15330 // All stores are done as v2f64 and possible bit cast.
15331 if (VecTy != MVT::v2f64) {
15332 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15333 DCI.AddToWorklist(Src.getNode());
15334 }
15335
15336 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15337 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15338 DCI.AddToWorklist(Swap.getNode());
15339 Chain = Swap.getValue(1);
15340 SDValue StoreOps[] = { Chain, Swap, Base };
15342 DAG.getVTList(MVT::Other),
15343 StoreOps, VecTy, MMO);
15344 DCI.AddToWorklist(Store.getNode());
15345 return Store;
15346}
15347
15348// Handle DAG combine for STORE (FP_TO_INT F).
15349SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15350 DAGCombinerInfo &DCI) const {
15351 SelectionDAG &DAG = DCI.DAG;
15352 SDLoc dl(N);
15353 unsigned Opcode = N->getOperand(1).getOpcode();
15354 (void)Opcode;
15355 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15356
15357 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15358 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15359 && "Not a FP_TO_INT Instruction!");
15360
15361 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15362 EVT Op1VT = N->getOperand(1).getValueType();
15363 EVT ResVT = Val.getValueType();
15364
15365 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15366 return SDValue();
15367
15368 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15369 bool ValidTypeForStoreFltAsInt =
15370 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15371 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15372
15373 // TODO: Lower conversion from f128 on all VSX targets
15374 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15375 return SDValue();
15376
15377 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15378 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15379 return SDValue();
15380
15381 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15382
15383 // Set number of bytes being converted.
15384 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15385 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15386 DAG.getIntPtrConstant(ByteSize, dl, false),
15387 DAG.getValueType(Op1VT)};
15388
15390 DAG.getVTList(MVT::Other), Ops,
15391 cast<StoreSDNode>(N)->getMemoryVT(),
15392 cast<StoreSDNode>(N)->getMemOperand());
15393
15394 return Val;
15395}
15396
15397static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15398 // Check that the source of the element keeps flipping
15399 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15400 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15401 for (int i = 1, e = Mask.size(); i < e; i++) {
15402 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15403 return false;
15404 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15405 return false;
15406 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15407 }
15408 return true;
15409}
15410
15411static bool isSplatBV(SDValue Op) {
15412 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15413 return false;
15414 SDValue FirstOp;
15415
15416 // Find first non-undef input.
15417 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15418 FirstOp = Op.getOperand(i);
15419 if (!FirstOp.isUndef())
15420 break;
15421 }
15422
15423 // All inputs are undef or the same as the first non-undef input.
15424 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15425 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15426 return false;
15427 return true;
15428}
15429
15431 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15432 return Op;
15433 if (Op.getOpcode() != ISD::BITCAST)
15434 return SDValue();
15435 Op = Op.getOperand(0);
15436 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15437 return Op;
15438 return SDValue();
15439}
15440
15441// Fix up the shuffle mask to account for the fact that the result of
15442// scalar_to_vector is not in lane zero. This just takes all values in
15443// the ranges specified by the min/max indices and adds the number of
15444// elements required to ensure each element comes from the respective
15445// position in the valid lane.
15446// On little endian, that's just the corresponding element in the other
15447// half of the vector. On big endian, it is in the same half but right
15448// justified rather than left justified in that half.
15450 int LHSMaxIdx, int RHSMinIdx,
15451 int RHSMaxIdx, int HalfVec,
15452 unsigned ValidLaneWidth,
15453 const PPCSubtarget &Subtarget) {
15454 for (int i = 0, e = ShuffV.size(); i < e; i++) {
15455 int Idx = ShuffV[i];
15456 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15457 ShuffV[i] +=
15458 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15459 }
15460}
15461
15462// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15463// the original is:
15464// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15465// In such a case, just change the shuffle mask to extract the element
15466// from the permuted index.
15468 const PPCSubtarget &Subtarget) {
15469 SDLoc dl(OrigSToV);
15470 EVT VT = OrigSToV.getValueType();
15471 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15472 "Expecting a SCALAR_TO_VECTOR here");
15473 SDValue Input = OrigSToV.getOperand(0);
15474
15475 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15476 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15477 SDValue OrigVector = Input.getOperand(0);
15478
15479 // Can't handle non-const element indices or different vector types
15480 // for the input to the extract and the output of the scalar_to_vector.
15481 if (Idx && VT == OrigVector.getValueType()) {
15482 unsigned NumElts = VT.getVectorNumElements();
15483 assert(
15484 NumElts > 1 &&
15485 "Cannot produce a permuted scalar_to_vector for one element vector");
15486 SmallVector<int, 16> NewMask(NumElts, -1);
15487 unsigned ResultInElt = NumElts / 2;
15488 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15489 NewMask[ResultInElt] = Idx->getZExtValue();
15490 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15491 }
15492 }
15493 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15494 OrigSToV.getOperand(0));
15495}
15496
15497// On little endian subtargets, combine shuffles such as:
15498// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15499// into:
15500// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15501// because the latter can be matched to a single instruction merge.
15502// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15503// to put the value into element zero. Adjust the shuffle mask so that the
15504// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15505// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15506// nodes with elements smaller than doubleword because all the ways
15507// of getting scalar data into a vector register put the value in the
15508// rightmost element of the left half of the vector.
15509SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15510 SelectionDAG &DAG) const {
15511 SDValue LHS = SVN->getOperand(0);
15512 SDValue RHS = SVN->getOperand(1);
15513 auto Mask = SVN->getMask();
15514 int NumElts = LHS.getValueType().getVectorNumElements();
15515 SDValue Res(SVN, 0);
15516 SDLoc dl(SVN);
15517 bool IsLittleEndian = Subtarget.isLittleEndian();
15518
15519 // On big endian targets this is only useful for subtargets with direct moves.
15520 // On little endian targets it would be useful for all subtargets with VSX.
15521 // However adding special handling for LE subtargets without direct moves
15522 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15523 // which includes direct moves.
15524 if (!Subtarget.hasDirectMove())
15525 return Res;
15526
15527 // If this is not a shuffle of a shuffle and the first element comes from
15528 // the second vector, canonicalize to the commuted form. This will make it
15529 // more likely to match one of the single instruction patterns.
15530 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15531 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15532 std::swap(LHS, RHS);
15533 Res = DAG.getCommutedVectorShuffle(*SVN);
15534 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15535 }
15536
15537 // Adjust the shuffle mask if either input vector comes from a
15538 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15539 // form (to prevent the need for a swap).
15540 SmallVector<int, 16> ShuffV(Mask);
15541 SDValue SToVLHS = isScalarToVec(LHS);
15542 SDValue SToVRHS = isScalarToVec(RHS);
15543 if (SToVLHS || SToVRHS) {
15544 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15545 // same type and have differing element sizes, then do not perform
15546 // the following transformation. The current transformation for
15547 // SCALAR_TO_VECTOR assumes that both input vectors have the same
15548 // element size. This will be updated in the future to account for
15549 // differing sizes of the LHS and RHS.
15550 if (SToVLHS && SToVRHS &&
15551 (SToVLHS.getValueType().getScalarSizeInBits() !=
15552 SToVRHS.getValueType().getScalarSizeInBits()))
15553 return Res;
15554
15555 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15556 : SToVRHS.getValueType().getVectorNumElements();
15557 int NumEltsOut = ShuffV.size();
15558 // The width of the "valid lane" (i.e. the lane that contains the value that
15559 // is vectorized) needs to be expressed in terms of the number of elements
15560 // of the shuffle. It is thereby the ratio of the values before and after
15561 // any bitcast.
15562 unsigned ValidLaneWidth =
15563 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15564 LHS.getValueType().getScalarSizeInBits()
15565 : SToVRHS.getValueType().getScalarSizeInBits() /
15566 RHS.getValueType().getScalarSizeInBits();
15567
15568 // Initially assume that neither input is permuted. These will be adjusted
15569 // accordingly if either input is.
15570 int LHSMaxIdx = -1;
15571 int RHSMinIdx = -1;
15572 int RHSMaxIdx = -1;
15573 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15574
15575 // Get the permuted scalar to vector nodes for the source(s) that come from
15576 // ISD::SCALAR_TO_VECTOR.
15577 // On big endian systems, this only makes sense for element sizes smaller
15578 // than 64 bits since for 64-bit elements, all instructions already put
15579 // the value into element zero. Since scalar size of LHS and RHS may differ
15580 // after isScalarToVec, this should be checked using their own sizes.
15581 if (SToVLHS) {
15582 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15583 return Res;
15584 // Set up the values for the shuffle vector fixup.
15585 LHSMaxIdx = NumEltsOut / NumEltsIn;
15586 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15587 if (SToVLHS.getValueType() != LHS.getValueType())
15588 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15589 LHS = SToVLHS;
15590 }
15591 if (SToVRHS) {
15592 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15593 return Res;
15594 RHSMinIdx = NumEltsOut;
15595 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15596 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15597 if (SToVRHS.getValueType() != RHS.getValueType())
15598 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15599 RHS = SToVRHS;
15600 }
15601
15602 // Fix up the shuffle mask to reflect where the desired element actually is.
15603 // The minimum and maximum indices that correspond to element zero for both
15604 // the LHS and RHS are computed and will control which shuffle mask entries
15605 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15606 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15607 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15608 HalfVec, ValidLaneWidth, Subtarget);
15609 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15610
15611 // We may have simplified away the shuffle. We won't be able to do anything
15612 // further with it here.
15613 if (!isa<ShuffleVectorSDNode>(Res))
15614 return Res;
15615 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15616 }
15617
15618 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15619 // The common case after we commuted the shuffle is that the RHS is a splat
15620 // and we have elements coming in from the splat at indices that are not
15621 // conducive to using a merge.
15622 // Example:
15623 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15624 if (!isSplatBV(TheSplat))
15625 return Res;
15626
15627 // We are looking for a mask such that all even elements are from
15628 // one vector and all odd elements from the other.
15629 if (!isAlternatingShuffMask(Mask, NumElts))
15630 return Res;
15631
15632 // Adjust the mask so we are pulling in the same index from the splat
15633 // as the index from the interesting vector in consecutive elements.
15634 if (IsLittleEndian) {
15635 // Example (even elements from first vector):
15636 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15637 if (Mask[0] < NumElts)
15638 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15639 if (ShuffV[i] < 0)
15640 continue;
15641 // If element from non-splat is undef, pick first element from splat.
15642 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15643 }
15644 // Example (odd elements from first vector):
15645 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15646 else
15647 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15648 if (ShuffV[i] < 0)
15649 continue;
15650 // If element from non-splat is undef, pick first element from splat.
15651 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15652 }
15653 } else {
15654 // Example (even elements from first vector):
15655 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15656 if (Mask[0] < NumElts)
15657 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15658 if (ShuffV[i] < 0)
15659 continue;
15660 // If element from non-splat is undef, pick first element from splat.
15661 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
15662 }
15663 // Example (odd elements from first vector):
15664 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15665 else
15666 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15667 if (ShuffV[i] < 0)
15668 continue;
15669 // If element from non-splat is undef, pick first element from splat.
15670 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
15671 }
15672 }
15673
15674 // If the RHS has undefs, we need to remove them since we may have created
15675 // a shuffle that adds those instead of the splat value.
15676 SDValue SplatVal =
15677 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15678 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15679
15680 if (IsLittleEndian)
15681 RHS = TheSplat;
15682 else
15683 LHS = TheSplat;
15684 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15685}
15686
15687SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15688 LSBaseSDNode *LSBase,
15689 DAGCombinerInfo &DCI) const {
15690 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15691 "Not a reverse memop pattern!");
15692
15693 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15694 auto Mask = SVN->getMask();
15695 int i = 0;
15696 auto I = Mask.rbegin();
15697 auto E = Mask.rend();
15698
15699 for (; I != E; ++I) {
15700 if (*I != i)
15701 return false;
15702 i++;
15703 }
15704 return true;
15705 };
15706
15707 SelectionDAG &DAG = DCI.DAG;
15708 EVT VT = SVN->getValueType(0);
15709
15710 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15711 return SDValue();
15712
15713 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15714 // See comment in PPCVSXSwapRemoval.cpp.
15715 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15716 if (!Subtarget.hasP9Vector())
15717 return SDValue();
15718
15719 if(!IsElementReverse(SVN))
15720 return SDValue();
15721
15722 if (LSBase->getOpcode() == ISD::LOAD) {
15723 // If the load return value 0 has more than one user except the
15724 // shufflevector instruction, it is not profitable to replace the
15725 // shufflevector with a reverse load.
15726 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15727 UI != UE; ++UI)
15728 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15729 return SDValue();
15730
15731 SDLoc dl(LSBase);
15732 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15733 return DAG.getMemIntrinsicNode(
15734 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15735 LSBase->getMemoryVT(), LSBase->getMemOperand());
15736 }
15737
15738 if (LSBase->getOpcode() == ISD::STORE) {
15739 // If there are other uses of the shuffle, the swap cannot be avoided.
15740 // Forcing the use of an X-Form (since swapped stores only have
15741 // X-Forms) without removing the swap is unprofitable.
15742 if (!SVN->hasOneUse())
15743 return SDValue();
15744
15745 SDLoc dl(LSBase);
15746 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15747 LSBase->getBasePtr()};
15748 return DAG.getMemIntrinsicNode(
15749 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15750 LSBase->getMemoryVT(), LSBase->getMemOperand());
15751 }
15752
15753 llvm_unreachable("Expected a load or store node here");
15754}
15755
15756static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15757 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
15758 if (IntrinsicID == Intrinsic::ppc_stdcx)
15759 StoreWidth = 8;
15760 else if (IntrinsicID == Intrinsic::ppc_stwcx)
15761 StoreWidth = 4;
15762 else if (IntrinsicID == Intrinsic::ppc_sthcx)
15763 StoreWidth = 2;
15764 else if (IntrinsicID == Intrinsic::ppc_stbcx)
15765 StoreWidth = 1;
15766 else
15767 return false;
15768 return true;
15769}
15770
15772 DAGCombinerInfo &DCI) const {
15773 SelectionDAG &DAG = DCI.DAG;
15774 SDLoc dl(N);
15775 switch (N->getOpcode()) {
15776 default: break;
15777 case ISD::ADD:
15778 return combineADD(N, DCI);
15779 case ISD::AND: {
15780 // We don't want (and (zext (shift...)), C) if C fits in the width of the
15781 // original input as that will prevent us from selecting optimal rotates.
15782 // This only matters if the input to the extend is i32 widened to i64.
15783 SDValue Op1 = N->getOperand(0);
15784 SDValue Op2 = N->getOperand(1);
15785 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15786 Op1.getOpcode() != ISD::ANY_EXTEND) ||
15787 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15788 Op1.getOperand(0).getValueType() != MVT::i32)
15789 break;
15790 SDValue NarrowOp = Op1.getOperand(0);
15791 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15792 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15793 break;
15794
15795 uint64_t Imm = Op2->getAsZExtVal();
15796 // Make sure that the constant is narrow enough to fit in the narrow type.
15797 if (!isUInt<32>(Imm))
15798 break;
15799 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15800 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15801 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15802 }
15803 case ISD::SHL:
15804 return combineSHL(N, DCI);
15805 case ISD::SRA:
15806 return combineSRA(N, DCI);
15807 case ISD::SRL:
15808 return combineSRL(N, DCI);
15809 case ISD::MUL:
15810 return combineMUL(N, DCI);
15811 case ISD::FMA:
15812 case PPCISD::FNMSUB:
15813 return combineFMALike(N, DCI);
15814 case PPCISD::SHL:
15815 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15816 return N->getOperand(0);
15817 break;
15818 case PPCISD::SRL:
15819 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15820 return N->getOperand(0);
15821 break;
15822 case PPCISD::SRA:
15823 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15824 if (C->isZero() || // 0 >>s V -> 0.
15825 C->isAllOnes()) // -1 >>s V -> -1.
15826 return N->getOperand(0);
15827 }
15828 break;
15829 case ISD::SIGN_EXTEND:
15830 case ISD::ZERO_EXTEND:
15831 case ISD::ANY_EXTEND:
15832 return DAGCombineExtBoolTrunc(N, DCI);
15833 case ISD::TRUNCATE:
15834 return combineTRUNCATE(N, DCI);
15835 case ISD::SETCC:
15836 if (SDValue CSCC = combineSetCC(N, DCI))
15837 return CSCC;
15838 [[fallthrough]];
15839 case ISD::SELECT_CC:
15840 return DAGCombineTruncBoolExt(N, DCI);
15841 case ISD::SINT_TO_FP:
15842 case ISD::UINT_TO_FP:
15843 return combineFPToIntToFP(N, DCI);
15845 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15846 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15847 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15848 }
15849 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15850 case ISD::STORE: {
15851
15852 EVT Op1VT = N->getOperand(1).getValueType();
15853 unsigned Opcode = N->getOperand(1).getOpcode();
15854
15855 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15856 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15857 SDValue Val = combineStoreFPToInt(N, DCI);
15858 if (Val)
15859 return Val;
15860 }
15861
15862 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15863 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15864 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15865 if (Val)
15866 return Val;
15867 }
15868
15869 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15870 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15871 N->getOperand(1).getNode()->hasOneUse() &&
15872 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15873 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15874
15875 // STBRX can only handle simple types and it makes no sense to store less
15876 // two bytes in byte-reversed order.
15877 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15878 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15879 break;
15880
15881 SDValue BSwapOp = N->getOperand(1).getOperand(0);
15882 // Do an any-extend to 32-bits if this is a half-word input.
15883 if (BSwapOp.getValueType() == MVT::i16)
15884 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15885
15886 // If the type of BSWAP operand is wider than stored memory width
15887 // it need to be shifted to the right side before STBRX.
15888 if (Op1VT.bitsGT(mVT)) {
15889 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15890 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15891 DAG.getConstant(Shift, dl, MVT::i32));
15892 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15893 if (Op1VT == MVT::i64)
15894 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15895 }
15896
15897 SDValue Ops[] = {
15898 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15899 };
15900 return
15901 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15902 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15903 cast<StoreSDNode>(N)->getMemOperand());
15904 }
15905
15906 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15907 // So it can increase the chance of CSE constant construction.
15908 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15909 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15910 // Need to sign-extended to 64-bits to handle negative values.
15911 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15912 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15913 MemVT.getSizeInBits());
15914 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15915
15916 // DAG.getTruncStore() can't be used here because it doesn't accept
15917 // the general (base + offset) addressing mode.
15918 // So we use UpdateNodeOperands and setTruncatingStore instead.
15919 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15920 N->getOperand(3));
15921 cast<StoreSDNode>(N)->setTruncatingStore(true);
15922 return SDValue(N, 0);
15923 }
15924
15925 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15926 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15927 if (Op1VT.isSimple()) {
15928 MVT StoreVT = Op1VT.getSimpleVT();
15929 if (Subtarget.needsSwapsForVSXMemOps() &&
15930 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15931 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15932 return expandVSXStoreForLE(N, DCI);
15933 }
15934 break;
15935 }
15936 case ISD::LOAD: {
15937 LoadSDNode *LD = cast<LoadSDNode>(N);
15938 EVT VT = LD->getValueType(0);
15939
15940 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15941 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15942 if (VT.isSimple()) {
15943 MVT LoadVT = VT.getSimpleVT();
15944 if (Subtarget.needsSwapsForVSXMemOps() &&
15945 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15946 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15947 return expandVSXLoadForLE(N, DCI);
15948 }
15949
15950 // We sometimes end up with a 64-bit integer load, from which we extract
15951 // two single-precision floating-point numbers. This happens with
15952 // std::complex<float>, and other similar structures, because of the way we
15953 // canonicalize structure copies. However, if we lack direct moves,
15954 // then the final bitcasts from the extracted integer values to the
15955 // floating-point numbers turn into store/load pairs. Even with direct moves,
15956 // just loading the two floating-point numbers is likely better.
15957 auto ReplaceTwoFloatLoad = [&]() {
15958 if (VT != MVT::i64)
15959 return false;
15960
15961 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15962 LD->isVolatile())
15963 return false;
15964
15965 // We're looking for a sequence like this:
15966 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15967 // t16: i64 = srl t13, Constant:i32<32>
15968 // t17: i32 = truncate t16
15969 // t18: f32 = bitcast t17
15970 // t19: i32 = truncate t13
15971 // t20: f32 = bitcast t19
15972
15973 if (!LD->hasNUsesOfValue(2, 0))
15974 return false;
15975
15976 auto UI = LD->use_begin();
15977 while (UI.getUse().getResNo() != 0) ++UI;
15978 SDNode *Trunc = *UI++;
15979 while (UI.getUse().getResNo() != 0) ++UI;
15980 SDNode *RightShift = *UI;
15981 if (Trunc->getOpcode() != ISD::TRUNCATE)
15982 std::swap(Trunc, RightShift);
15983
15984 if (Trunc->getOpcode() != ISD::TRUNCATE ||
15985 Trunc->getValueType(0) != MVT::i32 ||
15986 !Trunc->hasOneUse())
15987 return false;
15988 if (RightShift->getOpcode() != ISD::SRL ||
15989 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15990 RightShift->getConstantOperandVal(1) != 32 ||
15991 !RightShift->hasOneUse())
15992 return false;
15993
15994 SDNode *Trunc2 = *RightShift->use_begin();
15995 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
15996 Trunc2->getValueType(0) != MVT::i32 ||
15997 !Trunc2->hasOneUse())
15998 return false;
15999
16000 SDNode *Bitcast = *Trunc->use_begin();
16001 SDNode *Bitcast2 = *Trunc2->use_begin();
16002
16003 if (Bitcast->getOpcode() != ISD::BITCAST ||
16004 Bitcast->getValueType(0) != MVT::f32)
16005 return false;
16006 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16007 Bitcast2->getValueType(0) != MVT::f32)
16008 return false;
16009
16010 if (Subtarget.isLittleEndian())
16011 std::swap(Bitcast, Bitcast2);
16012
16013 // Bitcast has the second float (in memory-layout order) and Bitcast2
16014 // has the first one.
16015
16016 SDValue BasePtr = LD->getBasePtr();
16017 if (LD->isIndexed()) {
16018 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16019 "Non-pre-inc AM on PPC?");
16020 BasePtr =
16021 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16022 LD->getOffset());
16023 }
16024
16025 auto MMOFlags =
16026 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16027 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16028 LD->getPointerInfo(), LD->getAlign(),
16029 MMOFlags, LD->getAAInfo());
16030 SDValue AddPtr =
16031 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16032 BasePtr, DAG.getIntPtrConstant(4, dl));
16033 SDValue FloatLoad2 = DAG.getLoad(
16034 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16035 LD->getPointerInfo().getWithOffset(4),
16036 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16037
16038 if (LD->isIndexed()) {
16039 // Note that DAGCombine should re-form any pre-increment load(s) from
16040 // what is produced here if that makes sense.
16041 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16042 }
16043
16044 DCI.CombineTo(Bitcast2, FloatLoad);
16045 DCI.CombineTo(Bitcast, FloatLoad2);
16046
16047 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16048 SDValue(FloatLoad2.getNode(), 1));
16049 return true;
16050 };
16051
16052 if (ReplaceTwoFloatLoad())
16053 return SDValue(N, 0);
16054
16055 EVT MemVT = LD->getMemoryVT();
16056 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16057 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16058 if (LD->isUnindexed() && VT.isVector() &&
16059 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16060 // P8 and later hardware should just use LOAD.
16061 !Subtarget.hasP8Vector() &&
16062 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16063 VT == MVT::v4f32))) &&
16064 LD->getAlign() < ABIAlignment) {
16065 // This is a type-legal unaligned Altivec load.
16066 SDValue Chain = LD->getChain();
16067 SDValue Ptr = LD->getBasePtr();
16068 bool isLittleEndian = Subtarget.isLittleEndian();
16069
16070 // This implements the loading of unaligned vectors as described in
16071 // the venerable Apple Velocity Engine overview. Specifically:
16072 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16073 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16074 //
16075 // The general idea is to expand a sequence of one or more unaligned
16076 // loads into an alignment-based permutation-control instruction (lvsl
16077 // or lvsr), a series of regular vector loads (which always truncate
16078 // their input address to an aligned address), and a series of
16079 // permutations. The results of these permutations are the requested
16080 // loaded values. The trick is that the last "extra" load is not taken
16081 // from the address you might suspect (sizeof(vector) bytes after the
16082 // last requested load), but rather sizeof(vector) - 1 bytes after the
16083 // last requested vector. The point of this is to avoid a page fault if
16084 // the base address happened to be aligned. This works because if the
16085 // base address is aligned, then adding less than a full vector length
16086 // will cause the last vector in the sequence to be (re)loaded.
16087 // Otherwise, the next vector will be fetched as you might suspect was
16088 // necessary.
16089
16090 // We might be able to reuse the permutation generation from
16091 // a different base address offset from this one by an aligned amount.
16092 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16093 // optimization later.
16094 Intrinsic::ID Intr, IntrLD, IntrPerm;
16095 MVT PermCntlTy, PermTy, LDTy;
16096 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16097 : Intrinsic::ppc_altivec_lvsl;
16098 IntrLD = Intrinsic::ppc_altivec_lvx;
16099 IntrPerm = Intrinsic::ppc_altivec_vperm;
16100 PermCntlTy = MVT::v16i8;
16101 PermTy = MVT::v4i32;
16102 LDTy = MVT::v4i32;
16103
16104 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16105
16106 // Create the new MMO for the new base load. It is like the original MMO,
16107 // but represents an area in memory almost twice the vector size centered
16108 // on the original address. If the address is unaligned, we might start
16109 // reading up to (sizeof(vector)-1) bytes below the address of the
16110 // original unaligned load.
16112 MachineMemOperand *BaseMMO =
16113 MF.getMachineMemOperand(LD->getMemOperand(),
16114 -(int64_t)MemVT.getStoreSize()+1,
16115 2*MemVT.getStoreSize()-1);
16116
16117 // Create the new base load.
16118 SDValue LDXIntID =
16119 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16120 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16121 SDValue BaseLoad =
16123 DAG.getVTList(PermTy, MVT::Other),
16124 BaseLoadOps, LDTy, BaseMMO);
16125
16126 // Note that the value of IncOffset (which is provided to the next
16127 // load's pointer info offset value, and thus used to calculate the
16128 // alignment), and the value of IncValue (which is actually used to
16129 // increment the pointer value) are different! This is because we
16130 // require the next load to appear to be aligned, even though it
16131 // is actually offset from the base pointer by a lesser amount.
16132 int IncOffset = VT.getSizeInBits() / 8;
16133 int IncValue = IncOffset;
16134
16135 // Walk (both up and down) the chain looking for another load at the real
16136 // (aligned) offset (the alignment of the other load does not matter in
16137 // this case). If found, then do not use the offset reduction trick, as
16138 // that will prevent the loads from being later combined (as they would
16139 // otherwise be duplicates).
16140 if (!findConsecutiveLoad(LD, DAG))
16141 --IncValue;
16142
16143 SDValue Increment =
16144 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16145 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16146
16147 MachineMemOperand *ExtraMMO =
16148 MF.getMachineMemOperand(LD->getMemOperand(),
16149 1, 2*MemVT.getStoreSize()-1);
16150 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16151 SDValue ExtraLoad =
16153 DAG.getVTList(PermTy, MVT::Other),
16154 ExtraLoadOps, LDTy, ExtraMMO);
16155
16156 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16157 BaseLoad.getValue(1), ExtraLoad.getValue(1));
16158
16159 // Because vperm has a big-endian bias, we must reverse the order
16160 // of the input vectors and complement the permute control vector
16161 // when generating little endian code. We have already handled the
16162 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16163 // and ExtraLoad here.
16164 SDValue Perm;
16165 if (isLittleEndian)
16166 Perm = BuildIntrinsicOp(IntrPerm,
16167 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16168 else
16169 Perm = BuildIntrinsicOp(IntrPerm,
16170 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16171
16172 if (VT != PermTy)
16173 Perm = Subtarget.hasAltivec()
16174 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16175 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16176 DAG.getTargetConstant(1, dl, MVT::i64));
16177 // second argument is 1 because this rounding
16178 // is always exact.
16179
16180 // The output of the permutation is our loaded result, the TokenFactor is
16181 // our new chain.
16182 DCI.CombineTo(N, Perm, TF);
16183 return SDValue(N, 0);
16184 }
16185 }
16186 break;
16188 bool isLittleEndian = Subtarget.isLittleEndian();
16189 unsigned IID = N->getConstantOperandVal(0);
16190 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16191 : Intrinsic::ppc_altivec_lvsl);
16192 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16193 SDValue Add = N->getOperand(1);
16194
16195 int Bits = 4 /* 16 byte alignment */;
16196
16197 if (DAG.MaskedValueIsZero(Add->getOperand(1),
16198 APInt::getAllOnes(Bits /* alignment */)
16199 .zext(Add.getScalarValueSizeInBits()))) {
16200 SDNode *BasePtr = Add->getOperand(0).getNode();
16201 for (SDNode *U : BasePtr->uses()) {
16202 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16203 U->getConstantOperandVal(0) == IID) {
16204 // We've found another LVSL/LVSR, and this address is an aligned
16205 // multiple of that one. The results will be the same, so use the
16206 // one we've just found instead.
16207
16208 return SDValue(U, 0);
16209 }
16210 }
16211 }
16212
16213 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16214 SDNode *BasePtr = Add->getOperand(0).getNode();
16215 for (SDNode *U : BasePtr->uses()) {
16216 if (U->getOpcode() == ISD::ADD &&
16217 isa<ConstantSDNode>(U->getOperand(1)) &&
16218 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16219 (1ULL << Bits) ==
16220 0) {
16221 SDNode *OtherAdd = U;
16222 for (SDNode *V : OtherAdd->uses()) {
16223 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16224 V->getConstantOperandVal(0) == IID) {
16225 return SDValue(V, 0);
16226 }
16227 }
16228 }
16229 }
16230 }
16231 }
16232
16233 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16234 // Expose the vabsduw/h/b opportunity for down stream
16235 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16236 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16237 IID == Intrinsic::ppc_altivec_vmaxsh ||
16238 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16239 SDValue V1 = N->getOperand(1);
16240 SDValue V2 = N->getOperand(2);
16241 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16242 V1.getSimpleValueType() == MVT::v8i16 ||
16243 V1.getSimpleValueType() == MVT::v16i8) &&
16244 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16245 // (0-a, a)
16246 if (V1.getOpcode() == ISD::SUB &&
16248 V1.getOperand(1) == V2) {
16249 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16250 }
16251 // (a, 0-a)
16252 if (V2.getOpcode() == ISD::SUB &&
16253 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16254 V2.getOperand(1) == V1) {
16255 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16256 }
16257 // (x-y, y-x)
16258 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16259 V1.getOperand(0) == V2.getOperand(1) &&
16260 V1.getOperand(1) == V2.getOperand(0)) {
16261 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16262 }
16263 }
16264 }
16265 }
16266
16267 break;
16269 switch (N->getConstantOperandVal(1)) {
16270 default:
16271 break;
16272 case Intrinsic::ppc_altivec_vsum4sbs:
16273 case Intrinsic::ppc_altivec_vsum4shs:
16274 case Intrinsic::ppc_altivec_vsum4ubs: {
16275 // These sum-across intrinsics only have a chain due to the side effect
16276 // that they may set the SAT bit. If we know the SAT bit will not be set
16277 // for some inputs, we can replace any uses of their chain with the
16278 // input chain.
16279 if (BuildVectorSDNode *BVN =
16280 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16281 APInt APSplatBits, APSplatUndef;
16282 unsigned SplatBitSize;
16283 bool HasAnyUndefs;
16284 bool BVNIsConstantSplat = BVN->isConstantSplat(
16285 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16286 !Subtarget.isLittleEndian());
16287 // If the constant splat vector is 0, the SAT bit will not be set.
16288 if (BVNIsConstantSplat && APSplatBits == 0)
16289 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16290 }
16291 return SDValue();
16292 }
16293 case Intrinsic::ppc_vsx_lxvw4x:
16294 case Intrinsic::ppc_vsx_lxvd2x:
16295 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16296 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16297 if (Subtarget.needsSwapsForVSXMemOps())
16298 return expandVSXLoadForLE(N, DCI);
16299 break;
16300 }
16301 break;
16303 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16304 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16305 if (Subtarget.needsSwapsForVSXMemOps()) {
16306 switch (N->getConstantOperandVal(1)) {
16307 default:
16308 break;
16309 case Intrinsic::ppc_vsx_stxvw4x:
16310 case Intrinsic::ppc_vsx_stxvd2x:
16311 return expandVSXStoreForLE(N, DCI);
16312 }
16313 }
16314 break;
16315 case ISD::BSWAP: {
16316 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16317 // For subtargets without LDBRX, we can still do better than the default
16318 // expansion even for 64-bit BSWAP (LOAD).
16319 bool Is64BitBswapOn64BitTgt =
16320 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16321 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16322 N->getOperand(0).hasOneUse();
16323 if (IsSingleUseNormalLd &&
16324 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16325 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16326 SDValue Load = N->getOperand(0);
16327 LoadSDNode *LD = cast<LoadSDNode>(Load);
16328 // Create the byte-swapping load.
16329 SDValue Ops[] = {
16330 LD->getChain(), // Chain
16331 LD->getBasePtr(), // Ptr
16332 DAG.getValueType(N->getValueType(0)) // VT
16333 };
16334 SDValue BSLoad =
16336 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16337 MVT::i64 : MVT::i32, MVT::Other),
16338 Ops, LD->getMemoryVT(), LD->getMemOperand());
16339
16340 // If this is an i16 load, insert the truncate.
16341 SDValue ResVal = BSLoad;
16342 if (N->getValueType(0) == MVT::i16)
16343 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16344
16345 // First, combine the bswap away. This makes the value produced by the
16346 // load dead.
16347 DCI.CombineTo(N, ResVal);
16348
16349 // Next, combine the load away, we give it a bogus result value but a real
16350 // chain result. The result value is dead because the bswap is dead.
16351 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16352
16353 // Return N so it doesn't get rechecked!
16354 return SDValue(N, 0);
16355 }
16356 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16357 // before legalization so that the BUILD_PAIR is handled correctly.
16358 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16359 !IsSingleUseNormalLd)
16360 return SDValue();
16361 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16362
16363 // Can't split volatile or atomic loads.
16364 if (!LD->isSimple())
16365 return SDValue();
16366 SDValue BasePtr = LD->getBasePtr();
16367 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16368 LD->getPointerInfo(), LD->getAlign());
16369 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16370 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16371 DAG.getIntPtrConstant(4, dl));
16373 LD->getMemOperand(), 4, 4);
16374 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16375 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16376 SDValue Res;
16377 if (Subtarget.isLittleEndian())
16378 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16379 else
16380 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16381 SDValue TF =
16382 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16383 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16384 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16385 return Res;
16386 }
16387 case PPCISD::VCMP:
16388 // If a VCMP_rec node already exists with exactly the same operands as this
16389 // node, use its result instead of this node (VCMP_rec computes both a CR6
16390 // and a normal output).
16391 //
16392 if (!N->getOperand(0).hasOneUse() &&
16393 !N->getOperand(1).hasOneUse() &&
16394 !N->getOperand(2).hasOneUse()) {
16395
16396 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16397 SDNode *VCMPrecNode = nullptr;
16398
16399 SDNode *LHSN = N->getOperand(0).getNode();
16400 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16401 UI != E; ++UI)
16402 if (UI->getOpcode() == PPCISD::VCMP_rec &&
16403 UI->getOperand(1) == N->getOperand(1) &&
16404 UI->getOperand(2) == N->getOperand(2) &&
16405 UI->getOperand(0) == N->getOperand(0)) {
16406 VCMPrecNode = *UI;
16407 break;
16408 }
16409
16410 // If there is no VCMP_rec node, or if the flag value has a single use,
16411 // don't transform this.
16412 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16413 break;
16414
16415 // Look at the (necessarily single) use of the flag value. If it has a
16416 // chain, this transformation is more complex. Note that multiple things
16417 // could use the value result, which we should ignore.
16418 SDNode *FlagUser = nullptr;
16419 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16420 FlagUser == nullptr; ++UI) {
16421 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16422 SDNode *User = *UI;
16423 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16424 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16425 FlagUser = User;
16426 break;
16427 }
16428 }
16429 }
16430
16431 // If the user is a MFOCRF instruction, we know this is safe.
16432 // Otherwise we give up for right now.
16433 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16434 return SDValue(VCMPrecNode, 0);
16435 }
16436 break;
16437 case ISD::BR_CC: {
16438 // If this is a branch on an altivec predicate comparison, lower this so
16439 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16440 // lowering is done pre-legalize, because the legalizer lowers the predicate
16441 // compare down to code that is difficult to reassemble.
16442 // This code also handles branches that depend on the result of a store
16443 // conditional.
16444 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16445 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16446
16447 int CompareOpc;
16448 bool isDot;
16449
16450 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16451 break;
16452
16453 // Since we are doing this pre-legalize, the RHS can be a constant of
16454 // arbitrary bitwidth which may cause issues when trying to get the value
16455 // from the underlying APInt.
16456 auto RHSAPInt = RHS->getAsAPIntVal();
16457 if (!RHSAPInt.isIntN(64))
16458 break;
16459
16460 unsigned Val = RHSAPInt.getZExtValue();
16461 auto isImpossibleCompare = [&]() {
16462 // If this is a comparison against something other than 0/1, then we know
16463 // that the condition is never/always true.
16464 if (Val != 0 && Val != 1) {
16465 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16466 return N->getOperand(0);
16467 // Always !=, turn it into an unconditional branch.
16468 return DAG.getNode(ISD::BR, dl, MVT::Other,
16469 N->getOperand(0), N->getOperand(4));
16470 }
16471 return SDValue();
16472 };
16473 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16474 unsigned StoreWidth = 0;
16475 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16476 isStoreConditional(LHS, StoreWidth)) {
16477 if (SDValue Impossible = isImpossibleCompare())
16478 return Impossible;
16479 PPC::Predicate CompOpc;
16480 // eq 0 => ne
16481 // ne 0 => eq
16482 // eq 1 => eq
16483 // ne 1 => ne
16484 if (Val == 0)
16485 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16486 else
16487 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16488
16489 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16490 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16491 auto *MemNode = cast<MemSDNode>(LHS);
16492 SDValue ConstSt = DAG.getMemIntrinsicNode(
16494 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16495 MemNode->getMemoryVT(), MemNode->getMemOperand());
16496
16497 SDValue InChain;
16498 // Unchain the branch from the original store conditional.
16499 if (N->getOperand(0) == LHS.getValue(1))
16500 InChain = LHS.getOperand(0);
16501 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16502 SmallVector<SDValue, 4> InChains;
16503 SDValue InTF = N->getOperand(0);
16504 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16505 if (InTF.getOperand(i) != LHS.getValue(1))
16506 InChains.push_back(InTF.getOperand(i));
16507 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16508 }
16509
16510 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16511 DAG.getConstant(CompOpc, dl, MVT::i32),
16512 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16513 ConstSt.getValue(2));
16514 }
16515
16516 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16517 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16518 assert(isDot && "Can't compare against a vector result!");
16519
16520 if (SDValue Impossible = isImpossibleCompare())
16521 return Impossible;
16522
16523 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16524 // Create the PPCISD altivec 'dot' comparison node.
16525 SDValue Ops[] = {
16526 LHS.getOperand(2), // LHS of compare
16527 LHS.getOperand(3), // RHS of compare
16528 DAG.getConstant(CompareOpc, dl, MVT::i32)
16529 };
16530 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16531 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16532
16533 // Unpack the result based on how the target uses it.
16534 PPC::Predicate CompOpc;
16535 switch (LHS.getConstantOperandVal(1)) {
16536 default: // Can't happen, don't crash on invalid number though.
16537 case 0: // Branch on the value of the EQ bit of CR6.
16538 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16539 break;
16540 case 1: // Branch on the inverted value of the EQ bit of CR6.
16541 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16542 break;
16543 case 2: // Branch on the value of the LT bit of CR6.
16544 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16545 break;
16546 case 3: // Branch on the inverted value of the LT bit of CR6.
16547 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16548 break;
16549 }
16550
16551 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16552 DAG.getConstant(CompOpc, dl, MVT::i32),
16553 DAG.getRegister(PPC::CR6, MVT::i32),
16554 N->getOperand(4), CompNode.getValue(1));
16555 }
16556 break;
16557 }
16558 case ISD::BUILD_VECTOR:
16559 return DAGCombineBuildVector(N, DCI);
16560 }
16561
16562 return SDValue();
16563}
16564
16565SDValue
16567 SelectionDAG &DAG,
16568 SmallVectorImpl<SDNode *> &Created) const {
16569 // fold (sdiv X, pow2)
16570 EVT VT = N->getValueType(0);
16571 if (VT == MVT::i64 && !Subtarget.isPPC64())
16572 return SDValue();
16573 if ((VT != MVT::i32 && VT != MVT::i64) ||
16574 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16575 return SDValue();
16576
16577 SDLoc DL(N);
16578 SDValue N0 = N->getOperand(0);
16579
16580 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16581 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16582 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16583
16584 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16585 Created.push_back(Op.getNode());
16586
16587 if (IsNegPow2) {
16588 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16589 Created.push_back(Op.getNode());
16590 }
16591
16592 return Op;
16593}
16594
16595//===----------------------------------------------------------------------===//
16596// Inline Assembly Support
16597//===----------------------------------------------------------------------===//
16598
16600 KnownBits &Known,
16601 const APInt &DemandedElts,
16602 const SelectionDAG &DAG,
16603 unsigned Depth) const {
16604 Known.resetAll();
16605 switch (Op.getOpcode()) {
16606 default: break;
16607 case PPCISD::LBRX: {
16608 // lhbrx is known to have the top bits cleared out.
16609 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16610 Known.Zero = 0xFFFF0000;
16611 break;
16612 }
16614 switch (Op.getConstantOperandVal(0)) {
16615 default: break;
16616 case Intrinsic::ppc_altivec_vcmpbfp_p:
16617 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16618 case Intrinsic::ppc_altivec_vcmpequb_p:
16619 case Intrinsic::ppc_altivec_vcmpequh_p:
16620 case Intrinsic::ppc_altivec_vcmpequw_p:
16621 case Intrinsic::ppc_altivec_vcmpequd_p:
16622 case Intrinsic::ppc_altivec_vcmpequq_p:
16623 case Intrinsic::ppc_altivec_vcmpgefp_p:
16624 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16625 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16626 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16627 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16628 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16629 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16630 case Intrinsic::ppc_altivec_vcmpgtub_p:
16631 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16632 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16633 case Intrinsic::ppc_altivec_vcmpgtud_p:
16634 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16635 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16636 break;
16637 }
16638 break;
16639 }
16641 switch (Op.getConstantOperandVal(1)) {
16642 default:
16643 break;
16644 case Intrinsic::ppc_load2r:
16645 // Top bits are cleared for load2r (which is the same as lhbrx).
16646 Known.Zero = 0xFFFF0000;
16647 break;
16648 }
16649 break;
16650 }
16651 }
16652}
16653
16655 switch (Subtarget.getCPUDirective()) {
16656 default: break;
16657 case PPC::DIR_970:
16658 case PPC::DIR_PWR4:
16659 case PPC::DIR_PWR5:
16660 case PPC::DIR_PWR5X:
16661 case PPC::DIR_PWR6:
16662 case PPC::DIR_PWR6X:
16663 case PPC::DIR_PWR7:
16664 case PPC::DIR_PWR8:
16665 case PPC::DIR_PWR9:
16666 case PPC::DIR_PWR10:
16667 case PPC::DIR_PWR_FUTURE: {
16668 if (!ML)
16669 break;
16670
16672 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16673 // so that we can decrease cache misses and branch-prediction misses.
16674 // Actual alignment of the loop will depend on the hotness check and other
16675 // logic in alignBlocks.
16676 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16677 return Align(32);
16678 }
16679
16680 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16681
16682 // For small loops (between 5 and 8 instructions), align to a 32-byte
16683 // boundary so that the entire loop fits in one instruction-cache line.
16684 uint64_t LoopSize = 0;
16685 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16686 for (const MachineInstr &J : **I) {
16687 LoopSize += TII->getInstSizeInBytes(J);
16688 if (LoopSize > 32)
16689 break;
16690 }
16691
16692 if (LoopSize > 16 && LoopSize <= 32)
16693 return Align(32);
16694
16695 break;
16696 }
16697 }
16698
16700}
16701
16702/// getConstraintType - Given a constraint, return the type of
16703/// constraint it is for this target.
16706 if (Constraint.size() == 1) {
16707 switch (Constraint[0]) {
16708 default: break;
16709 case 'b':
16710 case 'r':
16711 case 'f':
16712 case 'd':
16713 case 'v':
16714 case 'y':
16715 return C_RegisterClass;
16716 case 'Z':
16717 // FIXME: While Z does indicate a memory constraint, it specifically
16718 // indicates an r+r address (used in conjunction with the 'y' modifier
16719 // in the replacement string). Currently, we're forcing the base
16720 // register to be r0 in the asm printer (which is interpreted as zero)
16721 // and forming the complete address in the second register. This is
16722 // suboptimal.
16723 return C_Memory;
16724 }
16725 } else if (Constraint == "wc") { // individual CR bits.
16726 return C_RegisterClass;
16727 } else if (Constraint == "wa" || Constraint == "wd" ||
16728 Constraint == "wf" || Constraint == "ws" ||
16729 Constraint == "wi" || Constraint == "ww") {
16730 return C_RegisterClass; // VSX registers.
16731 }
16732 return TargetLowering::getConstraintType(Constraint);
16733}
16734
16735/// Examine constraint type and operand type and determine a weight value.
16736/// This object must already have been set up with the operand type
16737/// and the current alternative constraint selected.
16740 AsmOperandInfo &info, const char *constraint) const {
16742 Value *CallOperandVal = info.CallOperandVal;
16743 // If we don't have a value, we can't do a match,
16744 // but allow it at the lowest weight.
16745 if (!CallOperandVal)
16746 return CW_Default;
16747 Type *type = CallOperandVal->getType();
16748
16749 // Look at the constraint type.
16750 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16751 return CW_Register; // an individual CR bit.
16752 else if ((StringRef(constraint) == "wa" ||
16753 StringRef(constraint) == "wd" ||
16754 StringRef(constraint) == "wf") &&
16755 type->isVectorTy())
16756 return CW_Register;
16757 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16758 return CW_Register; // just hold 64-bit integers data.
16759 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16760 return CW_Register;
16761 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16762 return CW_Register;
16763
16764 switch (*constraint) {
16765 default:
16767 break;
16768 case 'b':
16769 if (type->isIntegerTy())
16770 weight = CW_Register;
16771 break;
16772 case 'f':
16773 if (type->isFloatTy())
16774 weight = CW_Register;
16775 break;
16776 case 'd':
16777 if (type->isDoubleTy())
16778 weight = CW_Register;
16779 break;
16780 case 'v':
16781 if (type->isVectorTy())
16782 weight = CW_Register;
16783 break;
16784 case 'y':
16785 weight = CW_Register;
16786 break;
16787 case 'Z':
16788 weight = CW_Memory;
16789 break;
16790 }
16791 return weight;
16792}
16793
16794std::pair<unsigned, const TargetRegisterClass *>
16796 StringRef Constraint,
16797 MVT VT) const {
16798 if (Constraint.size() == 1) {
16799 // GCC RS6000 Constraint Letters
16800 switch (Constraint[0]) {
16801 case 'b': // R1-R31
16802 if (VT == MVT::i64 && Subtarget.isPPC64())
16803 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16804 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16805 case 'r': // R0-R31
16806 if (VT == MVT::i64 && Subtarget.isPPC64())
16807 return std::make_pair(0U, &PPC::G8RCRegClass);
16808 return std::make_pair(0U, &PPC::GPRCRegClass);
16809 // 'd' and 'f' constraints are both defined to be "the floating point
16810 // registers", where one is for 32-bit and the other for 64-bit. We don't
16811 // really care overly much here so just give them all the same reg classes.
16812 case 'd':
16813 case 'f':
16814 if (Subtarget.hasSPE()) {
16815 if (VT == MVT::f32 || VT == MVT::i32)
16816 return std::make_pair(0U, &PPC::GPRCRegClass);
16817 if (VT == MVT::f64 || VT == MVT::i64)
16818 return std::make_pair(0U, &PPC::SPERCRegClass);
16819 } else {
16820 if (VT == MVT::f32 || VT == MVT::i32)
16821 return std::make_pair(0U, &PPC::F4RCRegClass);
16822 if (VT == MVT::f64 || VT == MVT::i64)
16823 return std::make_pair(0U, &PPC::F8RCRegClass);
16824 }
16825 break;
16826 case 'v':
16827 if (Subtarget.hasAltivec() && VT.isVector())
16828 return std::make_pair(0U, &PPC::VRRCRegClass);
16829 else if (Subtarget.hasVSX())
16830 // Scalars in Altivec registers only make sense with VSX.
16831 return std::make_pair(0U, &PPC::VFRCRegClass);
16832 break;
16833 case 'y': // crrc
16834 return std::make_pair(0U, &PPC::CRRCRegClass);
16835 }
16836 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16837 // An individual CR bit.
16838 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16839 } else if ((Constraint == "wa" || Constraint == "wd" ||
16840 Constraint == "wf" || Constraint == "wi") &&
16841 Subtarget.hasVSX()) {
16842 // A VSX register for either a scalar (FP) or vector. There is no
16843 // support for single precision scalars on subtargets prior to Power8.
16844 if (VT.isVector())
16845 return std::make_pair(0U, &PPC::VSRCRegClass);
16846 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16847 return std::make_pair(0U, &PPC::VSSRCRegClass);
16848 return std::make_pair(0U, &PPC::VSFRCRegClass);
16849 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16850 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16851 return std::make_pair(0U, &PPC::VSSRCRegClass);
16852 else
16853 return std::make_pair(0U, &PPC::VSFRCRegClass);
16854 } else if (Constraint == "lr") {
16855 if (VT == MVT::i64)
16856 return std::make_pair(0U, &PPC::LR8RCRegClass);
16857 else
16858 return std::make_pair(0U, &PPC::LRRCRegClass);
16859 }
16860
16861 // Handle special cases of physical registers that are not properly handled
16862 // by the base class.
16863 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16864 // If we name a VSX register, we can't defer to the base class because it
16865 // will not recognize the correct register (their names will be VSL{0-31}
16866 // and V{0-31} so they won't match). So we match them here.
16867 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16868 int VSNum = atoi(Constraint.data() + 3);
16869 assert(VSNum >= 0 && VSNum <= 63 &&
16870 "Attempted to access a vsr out of range");
16871 if (VSNum < 32)
16872 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16873 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16874 }
16875
16876 // For float registers, we can't defer to the base class as it will match
16877 // the SPILLTOVSRRC class.
16878 if (Constraint.size() > 3 && Constraint[1] == 'f') {
16879 int RegNum = atoi(Constraint.data() + 2);
16880 if (RegNum > 31 || RegNum < 0)
16881 report_fatal_error("Invalid floating point register number");
16882 if (VT == MVT::f32 || VT == MVT::i32)
16883 return Subtarget.hasSPE()
16884 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16885 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16886 if (VT == MVT::f64 || VT == MVT::i64)
16887 return Subtarget.hasSPE()
16888 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16889 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16890 }
16891 }
16892
16893 std::pair<unsigned, const TargetRegisterClass *> R =
16895
16896 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16897 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16898 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16899 // register.
16900 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16901 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16902 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16903 PPC::GPRCRegClass.contains(R.first))
16904 return std::make_pair(TRI->getMatchingSuperReg(R.first,
16905 PPC::sub_32, &PPC::G8RCRegClass),
16906 &PPC::G8RCRegClass);
16907
16908 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16909 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16910 R.first = PPC::CR0;
16911 R.second = &PPC::CRRCRegClass;
16912 }
16913 // FIXME: This warning should ideally be emitted in the front end.
16914 const auto &TM = getTargetMachine();
16915 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16916 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16917 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16918 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16919 errs() << "warning: vector registers 20 to 32 are reserved in the "
16920 "default AIX AltiVec ABI and cannot be used\n";
16921 }
16922
16923 return R;
16924}
16925
16926/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16927/// vector. If it is invalid, don't add anything to Ops.
16929 StringRef Constraint,
16930 std::vector<SDValue> &Ops,
16931 SelectionDAG &DAG) const {
16932 SDValue Result;
16933
16934 // Only support length 1 constraints.
16935 if (Constraint.size() > 1)
16936 return;
16937
16938 char Letter = Constraint[0];
16939 switch (Letter) {
16940 default: break;
16941 case 'I':
16942 case 'J':
16943 case 'K':
16944 case 'L':
16945 case 'M':
16946 case 'N':
16947 case 'O':
16948 case 'P': {
16949 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16950 if (!CST) return; // Must be an immediate to match.
16951 SDLoc dl(Op);
16952 int64_t Value = CST->getSExtValue();
16953 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16954 // numbers are printed as such.
16955 switch (Letter) {
16956 default: llvm_unreachable("Unknown constraint letter!");
16957 case 'I': // "I" is a signed 16-bit constant.
16958 if (isInt<16>(Value))
16959 Result = DAG.getTargetConstant(Value, dl, TCVT);
16960 break;
16961 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16962 if (isShiftedUInt<16, 16>(Value))
16963 Result = DAG.getTargetConstant(Value, dl, TCVT);
16964 break;
16965 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16966 if (isShiftedInt<16, 16>(Value))
16967 Result = DAG.getTargetConstant(Value, dl, TCVT);
16968 break;
16969 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16970 if (isUInt<16>(Value))
16971 Result = DAG.getTargetConstant(Value, dl, TCVT);
16972 break;
16973 case 'M': // "M" is a constant that is greater than 31.
16974 if (Value > 31)
16975 Result = DAG.getTargetConstant(Value, dl, TCVT);
16976 break;
16977 case 'N': // "N" is a positive constant that is an exact power of two.
16978 if (Value > 0 && isPowerOf2_64(Value))
16979 Result = DAG.getTargetConstant(Value, dl, TCVT);
16980 break;
16981 case 'O': // "O" is the constant zero.
16982 if (Value == 0)
16983 Result = DAG.getTargetConstant(Value, dl, TCVT);
16984 break;
16985 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16986 if (isInt<16>(-Value))
16987 Result = DAG.getTargetConstant(Value, dl, TCVT);
16988 break;
16989 }
16990 break;
16991 }
16992 }
16993
16994 if (Result.getNode()) {
16995 Ops.push_back(Result);
16996 return;
16997 }
16998
16999 // Handle standard constraint letters.
17000 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17001}
17002
17005 SelectionDAG &DAG) const {
17006 if (I.getNumOperands() <= 1)
17007 return;
17008 if (!isa<ConstantSDNode>(Ops[1].getNode()))
17009 return;
17010 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17011 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17012 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17013 return;
17014
17015 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17016 Ops.push_back(DAG.getMDNode(MDN));
17017}
17018
17019// isLegalAddressingMode - Return true if the addressing mode represented
17020// by AM is legal for this target, for a load/store of the specified type.
17022 const AddrMode &AM, Type *Ty,
17023 unsigned AS,
17024 Instruction *I) const {
17025 // Vector type r+i form is supported since power9 as DQ form. We don't check
17026 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17027 // imm form is preferred and the offset can be adjusted to use imm form later
17028 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17029 // max offset to check legal addressing mode, we should be a little aggressive
17030 // to contain other offsets for that LSRUse.
17031 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17032 return false;
17033
17034 // PPC allows a sign-extended 16-bit immediate field.
17035 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17036 return false;
17037
17038 // No global is ever allowed as a base.
17039 if (AM.BaseGV)
17040 return false;
17041
17042 // PPC only support r+r,
17043 switch (AM.Scale) {
17044 case 0: // "r+i" or just "i", depending on HasBaseReg.
17045 break;
17046 case 1:
17047 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
17048 return false;
17049 // Otherwise we have r+r or r+i.
17050 break;
17051 case 2:
17052 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
17053 return false;
17054 // Allow 2*r as r+r.
17055 break;
17056 default:
17057 // No other scales are supported.
17058 return false;
17059 }
17060
17061 return true;
17062}
17063
17064SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17065 SelectionDAG &DAG) const {
17067 MachineFrameInfo &MFI = MF.getFrameInfo();
17068 MFI.setReturnAddressIsTaken(true);
17069
17071 return SDValue();
17072
17073 SDLoc dl(Op);
17074 unsigned Depth = Op.getConstantOperandVal(0);
17075
17076 // Make sure the function does not optimize away the store of the RA to
17077 // the stack.
17078 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17079 FuncInfo->setLRStoreRequired();
17080 bool isPPC64 = Subtarget.isPPC64();
17081 auto PtrVT = getPointerTy(MF.getDataLayout());
17082
17083 if (Depth > 0) {
17084 // The link register (return address) is saved in the caller's frame
17085 // not the callee's stack frame. So we must get the caller's frame
17086 // address and load the return address at the LR offset from there.
17087 SDValue FrameAddr =
17088 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17089 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17090 SDValue Offset =
17091 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17092 isPPC64 ? MVT::i64 : MVT::i32);
17093 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17094 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17096 }
17097
17098 // Just load the return address off the stack.
17099 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17100 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17102}
17103
17104SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17105 SelectionDAG &DAG) const {
17106 SDLoc dl(Op);
17107 unsigned Depth = Op.getConstantOperandVal(0);
17108
17110 MachineFrameInfo &MFI = MF.getFrameInfo();
17111 MFI.setFrameAddressIsTaken(true);
17112
17113 EVT PtrVT = getPointerTy(MF.getDataLayout());
17114 bool isPPC64 = PtrVT == MVT::i64;
17115
17116 // Naked functions never have a frame pointer, and so we use r1. For all
17117 // other functions, this decision must be delayed until during PEI.
17118 unsigned FrameReg;
17119 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17120 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17121 else
17122 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17123
17124 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17125 PtrVT);
17126 while (Depth--)
17127 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17128 FrameAddr, MachinePointerInfo());
17129 return FrameAddr;
17130}
17131
17132// FIXME? Maybe this could be a TableGen attribute on some registers and
17133// this table could be generated automatically from RegInfo.
17135 const MachineFunction &MF) const {
17136 bool isPPC64 = Subtarget.isPPC64();
17137
17138 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
17139 if (!is64Bit && VT != LLT::scalar(32))
17140 report_fatal_error("Invalid register global variable type");
17141
17143 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
17144 .Case("r2", isPPC64 ? Register() : PPC::R2)
17145 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
17146 .Default(Register());
17147
17148 if (Reg)
17149 return Reg;
17150 report_fatal_error("Invalid register name global variable");
17151}
17152
17154 // 32-bit SVR4 ABI access everything as got-indirect.
17155 if (Subtarget.is32BitELFABI())
17156 return true;
17157
17158 // AIX accesses everything indirectly through the TOC, which is similar to
17159 // the GOT.
17160 if (Subtarget.isAIXABI())
17161 return true;
17162
17164 // If it is small or large code model, module locals are accessed
17165 // indirectly by loading their address from .toc/.got.
17166 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17167 return true;
17168
17169 // JumpTable and BlockAddress are accessed as got-indirect.
17170 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17171 return true;
17172
17173 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17174 return Subtarget.isGVIndirectSymbol(G->getGlobal());
17175
17176 return false;
17177}
17178
17179bool
17181 // The PowerPC target isn't yet aware of offsets.
17182 return false;
17183}
17184
17186 const CallInst &I,
17187 MachineFunction &MF,
17188 unsigned Intrinsic) const {
17189 switch (Intrinsic) {
17190 case Intrinsic::ppc_atomicrmw_xchg_i128:
17191 case Intrinsic::ppc_atomicrmw_add_i128:
17192 case Intrinsic::ppc_atomicrmw_sub_i128:
17193 case Intrinsic::ppc_atomicrmw_nand_i128:
17194 case Intrinsic::ppc_atomicrmw_and_i128:
17195 case Intrinsic::ppc_atomicrmw_or_i128:
17196 case Intrinsic::ppc_atomicrmw_xor_i128:
17197 case Intrinsic::ppc_cmpxchg_i128:
17199 Info.memVT = MVT::i128;
17200 Info.ptrVal = I.getArgOperand(0);
17201 Info.offset = 0;
17202 Info.align = Align(16);
17205 return true;
17206 case Intrinsic::ppc_atomic_load_i128:
17208 Info.memVT = MVT::i128;
17209 Info.ptrVal = I.getArgOperand(0);
17210 Info.offset = 0;
17211 Info.align = Align(16);
17213 return true;
17214 case Intrinsic::ppc_atomic_store_i128:
17216 Info.memVT = MVT::i128;
17217 Info.ptrVal = I.getArgOperand(2);
17218 Info.offset = 0;
17219 Info.align = Align(16);
17221 return true;
17222 case Intrinsic::ppc_altivec_lvx:
17223 case Intrinsic::ppc_altivec_lvxl:
17224 case Intrinsic::ppc_altivec_lvebx:
17225 case Intrinsic::ppc_altivec_lvehx:
17226 case Intrinsic::ppc_altivec_lvewx:
17227 case Intrinsic::ppc_vsx_lxvd2x:
17228 case Intrinsic::ppc_vsx_lxvw4x:
17229 case Intrinsic::ppc_vsx_lxvd2x_be:
17230 case Intrinsic::ppc_vsx_lxvw4x_be:
17231 case Intrinsic::ppc_vsx_lxvl:
17232 case Intrinsic::ppc_vsx_lxvll: {
17233 EVT VT;
17234 switch (Intrinsic) {
17235 case Intrinsic::ppc_altivec_lvebx:
17236 VT = MVT::i8;
17237 break;
17238 case Intrinsic::ppc_altivec_lvehx:
17239 VT = MVT::i16;
17240 break;
17241 case Intrinsic::ppc_altivec_lvewx:
17242 VT = MVT::i32;
17243 break;
17244 case Intrinsic::ppc_vsx_lxvd2x:
17245 case Intrinsic::ppc_vsx_lxvd2x_be:
17246 VT = MVT::v2f64;
17247 break;
17248 default:
17249 VT = MVT::v4i32;
17250 break;
17251 }
17252
17254 Info.memVT = VT;
17255 Info.ptrVal = I.getArgOperand(0);
17256 Info.offset = -VT.getStoreSize()+1;
17257 Info.size = 2*VT.getStoreSize()-1;
17258 Info.align = Align(1);
17260 return true;
17261 }
17262 case Intrinsic::ppc_altivec_stvx:
17263 case Intrinsic::ppc_altivec_stvxl:
17264 case Intrinsic::ppc_altivec_stvebx:
17265 case Intrinsic::ppc_altivec_stvehx:
17266 case Intrinsic::ppc_altivec_stvewx:
17267 case Intrinsic::ppc_vsx_stxvd2x:
17268 case Intrinsic::ppc_vsx_stxvw4x:
17269 case Intrinsic::ppc_vsx_stxvd2x_be:
17270 case Intrinsic::ppc_vsx_stxvw4x_be:
17271 case Intrinsic::ppc_vsx_stxvl:
17272 case Intrinsic::ppc_vsx_stxvll: {
17273 EVT VT;
17274 switch (Intrinsic) {
17275 case Intrinsic::ppc_altivec_stvebx:
17276 VT = MVT::i8;
17277 break;
17278 case Intrinsic::ppc_altivec_stvehx:
17279 VT = MVT::i16;
17280 break;
17281 case Intrinsic::ppc_altivec_stvewx:
17282 VT = MVT::i32;
17283 break;
17284 case Intrinsic::ppc_vsx_stxvd2x:
17285 case Intrinsic::ppc_vsx_stxvd2x_be:
17286 VT = MVT::v2f64;
17287 break;
17288 default:
17289 VT = MVT::v4i32;
17290 break;
17291 }
17292
17294 Info.memVT = VT;
17295 Info.ptrVal = I.getArgOperand(1);
17296 Info.offset = -VT.getStoreSize()+1;
17297 Info.size = 2*VT.getStoreSize()-1;
17298 Info.align = Align(1);
17300 return true;
17301 }
17302 case Intrinsic::ppc_stdcx:
17303 case Intrinsic::ppc_stwcx:
17304 case Intrinsic::ppc_sthcx:
17305 case Intrinsic::ppc_stbcx: {
17306 EVT VT;
17307 auto Alignment = Align(8);
17308 switch (Intrinsic) {
17309 case Intrinsic::ppc_stdcx:
17310 VT = MVT::i64;
17311 break;
17312 case Intrinsic::ppc_stwcx:
17313 VT = MVT::i32;
17314 Alignment = Align(4);
17315 break;
17316 case Intrinsic::ppc_sthcx:
17317 VT = MVT::i16;
17318 Alignment = Align(2);
17319 break;
17320 case Intrinsic::ppc_stbcx:
17321 VT = MVT::i8;
17322 Alignment = Align(1);
17323 break;
17324 }
17326 Info.memVT = VT;
17327 Info.ptrVal = I.getArgOperand(0);
17328 Info.offset = 0;
17329 Info.align = Alignment;
17331 return true;
17332 }
17333 default:
17334 break;
17335 }
17336
17337 return false;
17338}
17339
17340/// It returns EVT::Other if the type should be determined using generic
17341/// target-independent logic.
17343 const MemOp &Op, const AttributeList &FuncAttributes) const {
17344 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17345 // We should use Altivec/VSX loads and stores when available. For unaligned
17346 // addresses, unaligned VSX loads are only fast starting with the P8.
17347 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17348 if (Op.isMemset() && Subtarget.hasVSX()) {
17349 uint64_t TailSize = Op.size() % 16;
17350 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17351 // element if vector element type matches tail store. For tail size
17352 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17353 if (TailSize > 2 && TailSize <= 4) {
17354 return MVT::v8i16;
17355 }
17356 return MVT::v4i32;
17357 }
17358 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17359 return MVT::v4i32;
17360 }
17361 }
17362
17363 if (Subtarget.isPPC64()) {
17364 return MVT::i64;
17365 }
17366
17367 return MVT::i32;
17368}
17369
17370/// Returns true if it is beneficial to convert a load of a constant
17371/// to just the constant itself.
17373 Type *Ty) const {
17374 assert(Ty->isIntegerTy());
17375
17376 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17377 return !(BitSize == 0 || BitSize > 64);
17378}
17379
17381 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17382 return false;
17383 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17384 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17385 return NumBits1 == 64 && NumBits2 == 32;
17386}
17387
17389 if (!VT1.isInteger() || !VT2.isInteger())
17390 return false;
17391 unsigned NumBits1 = VT1.getSizeInBits();
17392 unsigned NumBits2 = VT2.getSizeInBits();
17393 return NumBits1 == 64 && NumBits2 == 32;
17394}
17395
17397 // Generally speaking, zexts are not free, but they are free when they can be
17398 // folded with other operations.
17399 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17400 EVT MemVT = LD->getMemoryVT();
17401 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17402 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17403 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17404 LD->getExtensionType() == ISD::ZEXTLOAD))
17405 return true;
17406 }
17407
17408 // FIXME: Add other cases...
17409 // - 32-bit shifts with a zext to i64
17410 // - zext after ctlz, bswap, etc.
17411 // - zext after and by a constant mask
17412
17413 return TargetLowering::isZExtFree(Val, VT2);
17414}
17415
17416bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17417 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17418 "invalid fpext types");
17419 // Extending to float128 is not free.
17420 if (DestVT == MVT::f128)
17421 return false;
17422 return true;
17423}
17424
17426 return isInt<16>(Imm) || isUInt<16>(Imm);
17427}
17428
17430 return isInt<16>(Imm) || isUInt<16>(Imm);
17431}
17432
17435 unsigned *Fast) const {
17437 return false;
17438
17439 // PowerPC supports unaligned memory access for simple non-vector types.
17440 // Although accessing unaligned addresses is not as efficient as accessing
17441 // aligned addresses, it is generally more efficient than manual expansion,
17442 // and generally only traps for software emulation when crossing page
17443 // boundaries.
17444
17445 if (!VT.isSimple())
17446 return false;
17447
17448 if (VT.isFloatingPoint() && !VT.isVector() &&
17449 !Subtarget.allowsUnalignedFPAccess())
17450 return false;
17451
17452 if (VT.getSimpleVT().isVector()) {
17453 if (Subtarget.hasVSX()) {
17454 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17455 VT != MVT::v4f32 && VT != MVT::v4i32)
17456 return false;
17457 } else {
17458 return false;
17459 }
17460 }
17461
17462 if (VT == MVT::ppcf128)
17463 return false;
17464
17465 if (Fast)
17466 *Fast = 1;
17467
17468 return true;
17469}
17470
17472 SDValue C) const {
17473 // Check integral scalar types.
17474 if (!VT.isScalarInteger())
17475 return false;
17476 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17477 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17478 return false;
17479 // This transformation will generate >= 2 operations. But the following
17480 // cases will generate <= 2 instructions during ISEL. So exclude them.
17481 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17482 // HW instruction, ie. MULLI
17483 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17484 // instruction is needed than case 1, ie. MULLI and RLDICR
17485 int64_t Imm = ConstNode->getSExtValue();
17486 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17487 Imm >>= Shift;
17488 if (isInt<16>(Imm))
17489 return false;
17490 uint64_t UImm = static_cast<uint64_t>(Imm);
17491 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17492 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17493 return true;
17494 }
17495 return false;
17496}
17497
17499 EVT VT) const {
17502}
17503
17505 Type *Ty) const {
17506 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17507 return false;
17508 switch (Ty->getScalarType()->getTypeID()) {
17509 case Type::FloatTyID:
17510 case Type::DoubleTyID:
17511 return true;
17512 case Type::FP128TyID:
17513 return Subtarget.hasP9Vector();
17514 default:
17515 return false;
17516 }
17517}
17518
17519// FIXME: add more patterns which are not profitable to hoist.
17521 if (!I->hasOneUse())
17522 return true;
17523
17524 Instruction *User = I->user_back();
17525 assert(User && "A single use instruction with no uses.");
17526
17527 switch (I->getOpcode()) {
17528 case Instruction::FMul: {
17529 // Don't break FMA, PowerPC prefers FMA.
17530 if (User->getOpcode() != Instruction::FSub &&
17531 User->getOpcode() != Instruction::FAdd)
17532 return true;
17533
17535 const Function *F = I->getFunction();
17536 const DataLayout &DL = F->getDataLayout();
17537 Type *Ty = User->getOperand(0)->getType();
17538
17539 return !(
17542 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17543 }
17544 case Instruction::Load: {
17545 // Don't break "store (load float*)" pattern, this pattern will be combined
17546 // to "store (load int32)" in later InstCombine pass. See function
17547 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17548 // cycles than loading a 32 bit integer.
17549 LoadInst *LI = cast<LoadInst>(I);
17550 // For the loads that combineLoadToOperationType does nothing, like
17551 // ordered load, it should be profitable to hoist them.
17552 // For swifterror load, it can only be used for pointer to pointer type, so
17553 // later type check should get rid of this case.
17554 if (!LI->isUnordered())
17555 return true;
17556
17557 if (User->getOpcode() != Instruction::Store)
17558 return true;
17559
17560 if (I->getType()->getTypeID() != Type::FloatTyID)
17561 return true;
17562
17563 return false;
17564 }
17565 default:
17566 return true;
17567 }
17568 return true;
17569}
17570
17571const MCPhysReg *
17573 // LR is a callee-save register, but we must treat it as clobbered by any call
17574 // site. Hence we include LR in the scratch registers, which are in turn added
17575 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17576 // to CTR, which is used by any indirect call.
17577 static const MCPhysReg ScratchRegs[] = {
17578 PPC::X12, PPC::LR8, PPC::CTR8, 0
17579 };
17580
17581 return ScratchRegs;
17582}
17583
17585 const Constant *PersonalityFn) const {
17586 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17587}
17588
17590 const Constant *PersonalityFn) const {
17591 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17592}
17593
17594bool
17596 EVT VT , unsigned DefinedValues) const {
17597 if (VT == MVT::v2i64)
17598 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17599
17600 if (Subtarget.hasVSX())
17601 return true;
17602
17604}
17605
17607 if (DisableILPPref || Subtarget.enableMachineScheduler())
17609
17610 return Sched::ILP;
17611}
17612
17613// Create a fast isel object.
17614FastISel *
17616 const TargetLibraryInfo *LibInfo) const {
17617 return PPC::createFastISel(FuncInfo, LibInfo);
17618}
17619
17620// 'Inverted' means the FMA opcode after negating one multiplicand.
17621// For example, (fma -a b c) = (fnmsub a b c)
17622static unsigned invertFMAOpcode(unsigned Opc) {
17623 switch (Opc) {
17624 default:
17625 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17626 case ISD::FMA:
17627 return PPCISD::FNMSUB;
17628 case PPCISD::FNMSUB:
17629 return ISD::FMA;
17630 }
17631}
17632
17634 bool LegalOps, bool OptForSize,
17636 unsigned Depth) const {
17638 return SDValue();
17639
17640 unsigned Opc = Op.getOpcode();
17641 EVT VT = Op.getValueType();
17642 SDNodeFlags Flags = Op.getNode()->getFlags();
17643
17644 switch (Opc) {
17645 case PPCISD::FNMSUB:
17646 if (!Op.hasOneUse() || !isTypeLegal(VT))
17647 break;
17648
17650 SDValue N0 = Op.getOperand(0);
17651 SDValue N1 = Op.getOperand(1);
17652 SDValue N2 = Op.getOperand(2);
17653 SDLoc Loc(Op);
17654
17656 SDValue NegN2 =
17657 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17658
17659 if (!NegN2)
17660 return SDValue();
17661
17662 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17663 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17664 // These transformations may change sign of zeroes. For example,
17665 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17666 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17667 // Try and choose the cheaper one to negate.
17669 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17670 N0Cost, Depth + 1);
17671
17673 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17674 N1Cost, Depth + 1);
17675
17676 if (NegN0 && N0Cost <= N1Cost) {
17677 Cost = std::min(N0Cost, N2Cost);
17678 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17679 } else if (NegN1) {
17680 Cost = std::min(N1Cost, N2Cost);
17681 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17682 }
17683 }
17684
17685 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17686 if (isOperationLegal(ISD::FMA, VT)) {
17687 Cost = N2Cost;
17688 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17689 }
17690
17691 break;
17692 }
17693
17694 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17695 Cost, Depth);
17696}
17697
17698// Override to enable LOAD_STACK_GUARD lowering on Linux.
17700 if (!Subtarget.isTargetLinux())
17702 return true;
17703}
17704
17705// Override to disable global variable loading on Linux and insert AIX canary
17706// word declaration.
17708 if (Subtarget.isAIXABI()) {
17709 M.getOrInsertGlobal(AIXSSPCanaryWordName,
17710 PointerType::getUnqual(M.getContext()));
17711 return;
17712 }
17713 if (!Subtarget.isTargetLinux())
17715}
17716
17718 if (Subtarget.isAIXABI())
17719 return M.getGlobalVariable(AIXSSPCanaryWordName);
17721}
17722
17724 bool ForCodeSize) const {
17725 if (!VT.isSimple() || !Subtarget.hasVSX())
17726 return false;
17727
17728 switch(VT.getSimpleVT().SimpleTy) {
17729 default:
17730 // For FP types that are currently not supported by PPC backend, return
17731 // false. Examples: f16, f80.
17732 return false;
17733 case MVT::f32:
17734 case MVT::f64: {
17735 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
17736 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17737 return true;
17738 }
17739 bool IsExact;
17740 APSInt IntResult(16, false);
17741 // The rounding mode doesn't really matter because we only care about floats
17742 // that can be converted to integers exactly.
17743 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17744 // For exact values in the range [-16, 15] we can materialize the float.
17745 if (IsExact && IntResult <= 15 && IntResult >= -16)
17746 return true;
17747 return Imm.isZero();
17748 }
17749 case MVT::ppcf128:
17750 return Imm.isPosZero();
17751 }
17752}
17753
17754// For vector shift operation op, fold
17755// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17757 SelectionDAG &DAG) {
17758 SDValue N0 = N->getOperand(0);
17759 SDValue N1 = N->getOperand(1);
17760 EVT VT = N0.getValueType();
17761 unsigned OpSizeInBits = VT.getScalarSizeInBits();
17762 unsigned Opcode = N->getOpcode();
17763 unsigned TargetOpcode;
17764
17765 switch (Opcode) {
17766 default:
17767 llvm_unreachable("Unexpected shift operation");
17768 case ISD::SHL:
17769 TargetOpcode = PPCISD::SHL;
17770 break;
17771 case ISD::SRL:
17772 TargetOpcode = PPCISD::SRL;
17773 break;
17774 case ISD::SRA:
17775 TargetOpcode = PPCISD::SRA;
17776 break;
17777 }
17778
17779 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17780 N1->getOpcode() == ISD::AND)
17781 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17782 if (Mask->getZExtValue() == OpSizeInBits - 1)
17783 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17784
17785 return SDValue();
17786}
17787
17788SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17789 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17790 return Value;
17791
17792 SDValue N0 = N->getOperand(0);
17793 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17794 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17795 N0.getOpcode() != ISD::SIGN_EXTEND ||
17796 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17797 N->getValueType(0) != MVT::i64)
17798 return SDValue();
17799
17800 // We can't save an operation here if the value is already extended, and
17801 // the existing shift is easier to combine.
17802 SDValue ExtsSrc = N0.getOperand(0);
17803 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17804 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17805 return SDValue();
17806
17807 SDLoc DL(N0);
17808 SDValue ShiftBy = SDValue(CN1, 0);
17809 // We want the shift amount to be i32 on the extswli, but the shift could
17810 // have an i64.
17811 if (ShiftBy.getValueType() == MVT::i64)
17812 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17813
17814 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17815 ShiftBy);
17816}
17817
17818SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17819 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17820 return Value;
17821
17822 return SDValue();
17823}
17824
17825SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17826 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17827 return Value;
17828
17829 return SDValue();
17830}
17831
17832// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17833// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17834// When C is zero, the equation (addi Z, -C) can be simplified to Z
17835// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17837 const PPCSubtarget &Subtarget) {
17838 if (!Subtarget.isPPC64())
17839 return SDValue();
17840
17841 SDValue LHS = N->getOperand(0);
17842 SDValue RHS = N->getOperand(1);
17843
17844 auto isZextOfCompareWithConstant = [](SDValue Op) {
17845 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17846 Op.getValueType() != MVT::i64)
17847 return false;
17848
17849 SDValue Cmp = Op.getOperand(0);
17850 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17851 Cmp.getOperand(0).getValueType() != MVT::i64)
17852 return false;
17853
17854 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17855 int64_t NegConstant = 0 - Constant->getSExtValue();
17856 // Due to the limitations of the addi instruction,
17857 // -C is required to be [-32768, 32767].
17858 return isInt<16>(NegConstant);
17859 }
17860
17861 return false;
17862 };
17863
17864 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17865 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17866
17867 // If there is a pattern, canonicalize a zext operand to the RHS.
17868 if (LHSHasPattern && !RHSHasPattern)
17869 std::swap(LHS, RHS);
17870 else if (!LHSHasPattern && !RHSHasPattern)
17871 return SDValue();
17872
17873 SDLoc DL(N);
17874 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17875 SDValue Cmp = RHS.getOperand(0);
17876 SDValue Z = Cmp.getOperand(0);
17877 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17878 int64_t NegConstant = 0 - Constant->getSExtValue();
17879
17880 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17881 default: break;
17882 case ISD::SETNE: {
17883 // when C == 0
17884 // --> addze X, (addic Z, -1).carry
17885 // /
17886 // add X, (zext(setne Z, C))--
17887 // \ when -32768 <= -C <= 32767 && C != 0
17888 // --> addze X, (addic (addi Z, -C), -1).carry
17889 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17890 DAG.getConstant(NegConstant, DL, MVT::i64));
17891 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17892 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17893 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17894 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17895 SDValue(Addc.getNode(), 1));
17896 }
17897 case ISD::SETEQ: {
17898 // when C == 0
17899 // --> addze X, (subfic Z, 0).carry
17900 // /
17901 // add X, (zext(sete Z, C))--
17902 // \ when -32768 <= -C <= 32767 && C != 0
17903 // --> addze X, (subfic (addi Z, -C), 0).carry
17904 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17905 DAG.getConstant(NegConstant, DL, MVT::i64));
17906 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17907 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17908 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17909 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17910 SDValue(Subc.getNode(), 1));
17911 }
17912 }
17913
17914 return SDValue();
17915}
17916
17917// Transform
17918// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17919// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17920// In this case both C1 and C2 must be known constants.
17921// C1+C2 must fit into a 34 bit signed integer.
17923 const PPCSubtarget &Subtarget) {
17924 if (!Subtarget.isUsingPCRelativeCalls())
17925 return SDValue();
17926
17927 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17928 // If we find that node try to cast the Global Address and the Constant.
17929 SDValue LHS = N->getOperand(0);
17930 SDValue RHS = N->getOperand(1);
17931
17932 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17933 std::swap(LHS, RHS);
17934
17935 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17936 return SDValue();
17937
17938 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17939 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17940 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17941
17942 // Check that both casts succeeded.
17943 if (!GSDN || !ConstNode)
17944 return SDValue();
17945
17946 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17947 SDLoc DL(GSDN);
17948
17949 // The signed int offset needs to fit in 34 bits.
17950 if (!isInt<34>(NewOffset))
17951 return SDValue();
17952
17953 // The new global address is a copy of the old global address except
17954 // that it has the updated Offset.
17955 SDValue GA =
17956 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17957 NewOffset, GSDN->getTargetFlags());
17958 SDValue MatPCRel =
17959 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17960 return MatPCRel;
17961}
17962
17963SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17964 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17965 return Value;
17966
17967 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17968 return Value;
17969
17970 return SDValue();
17971}
17972
17973// Detect TRUNCATE operations on bitcasts of float128 values.
17974// What we are looking for here is the situtation where we extract a subset
17975// of bits from a 128 bit float.
17976// This can be of two forms:
17977// 1) BITCAST of f128 feeding TRUNCATE
17978// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17979// The reason this is required is because we do not have a legal i128 type
17980// and so we want to prevent having to store the f128 and then reload part
17981// of it.
17982SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17983 DAGCombinerInfo &DCI) const {
17984 // If we are using CRBits then try that first.
17985 if (Subtarget.useCRBits()) {
17986 // Check if CRBits did anything and return that if it did.
17987 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17988 return CRTruncValue;
17989 }
17990
17991 SDLoc dl(N);
17992 SDValue Op0 = N->getOperand(0);
17993
17994 // Looking for a truncate of i128 to i64.
17995 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
17996 return SDValue();
17997
17998 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17999
18000 // SRL feeding TRUNCATE.
18001 if (Op0.getOpcode() == ISD::SRL) {
18002 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
18003 // The right shift has to be by 64 bits.
18004 if (!ConstNode || ConstNode->getZExtValue() != 64)
18005 return SDValue();
18006
18007 // Switch the element number to extract.
18008 EltToExtract = EltToExtract ? 0 : 1;
18009 // Update Op0 past the SRL.
18010 Op0 = Op0.getOperand(0);
18011 }
18012
18013 // BITCAST feeding a TRUNCATE possibly via SRL.
18014 if (Op0.getOpcode() == ISD::BITCAST &&
18015 Op0.getValueType() == MVT::i128 &&
18016 Op0.getOperand(0).getValueType() == MVT::f128) {
18017 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18018 return DCI.DAG.getNode(
18019 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18020 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18021 }
18022 return SDValue();
18023}
18024
18025SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18026 SelectionDAG &DAG = DCI.DAG;
18027
18028 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18029 if (!ConstOpOrElement)
18030 return SDValue();
18031
18032 // An imul is usually smaller than the alternative sequence for legal type.
18034 isOperationLegal(ISD::MUL, N->getValueType(0)))
18035 return SDValue();
18036
18037 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18038 switch (this->Subtarget.getCPUDirective()) {
18039 default:
18040 // TODO: enhance the condition for subtarget before pwr8
18041 return false;
18042 case PPC::DIR_PWR8:
18043 // type mul add shl
18044 // scalar 4 1 1
18045 // vector 7 2 2
18046 return true;
18047 case PPC::DIR_PWR9:
18048 case PPC::DIR_PWR10:
18050 // type mul add shl
18051 // scalar 5 2 2
18052 // vector 7 2 2
18053
18054 // The cycle RATIO of related operations are showed as a table above.
18055 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18056 // scalar and vector type. For 2 instrs patterns, add/sub + shl
18057 // are 4, it is always profitable; but for 3 instrs patterns
18058 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18059 // So we should only do it for vector type.
18060 return IsAddOne && IsNeg ? VT.isVector() : true;
18061 }
18062 };
18063
18064 EVT VT = N->getValueType(0);
18065 SDLoc DL(N);
18066
18067 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18068 bool IsNeg = MulAmt.isNegative();
18069 APInt MulAmtAbs = MulAmt.abs();
18070
18071 if ((MulAmtAbs - 1).isPowerOf2()) {
18072 // (mul x, 2^N + 1) => (add (shl x, N), x)
18073 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18074
18075 if (!IsProfitable(IsNeg, true, VT))
18076 return SDValue();
18077
18078 SDValue Op0 = N->getOperand(0);
18079 SDValue Op1 =
18080 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18081 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18082 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18083
18084 if (!IsNeg)
18085 return Res;
18086
18087 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18088 } else if ((MulAmtAbs + 1).isPowerOf2()) {
18089 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18090 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18091
18092 if (!IsProfitable(IsNeg, false, VT))
18093 return SDValue();
18094
18095 SDValue Op0 = N->getOperand(0);
18096 SDValue Op1 =
18097 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18098 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18099
18100 if (!IsNeg)
18101 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18102 else
18103 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18104
18105 } else {
18106 return SDValue();
18107 }
18108}
18109
18110// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18111// in combiner since we need to check SD flags and other subtarget features.
18112SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18113 DAGCombinerInfo &DCI) const {
18114 SDValue N0 = N->getOperand(0);
18115 SDValue N1 = N->getOperand(1);
18116 SDValue N2 = N->getOperand(2);
18117 SDNodeFlags Flags = N->getFlags();
18118 EVT VT = N->getValueType(0);
18119 SelectionDAG &DAG = DCI.DAG;
18121 unsigned Opc = N->getOpcode();
18122 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18123 bool LegalOps = !DCI.isBeforeLegalizeOps();
18124 SDLoc Loc(N);
18125
18126 if (!isOperationLegal(ISD::FMA, VT))
18127 return SDValue();
18128
18129 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18130 // since (fnmsub a b c)=-0 while c-ab=+0.
18131 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18132 return SDValue();
18133
18134 // (fma (fneg a) b c) => (fnmsub a b c)
18135 // (fnmsub (fneg a) b c) => (fma a b c)
18136 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18137 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18138
18139 // (fma a (fneg b) c) => (fnmsub a b c)
18140 // (fnmsub a (fneg b) c) => (fma a b c)
18141 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18142 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18143
18144 return SDValue();
18145}
18146
18147bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18148 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18149 if (!Subtarget.is64BitELFABI())
18150 return false;
18151
18152 // If not a tail call then no need to proceed.
18153 if (!CI->isTailCall())
18154 return false;
18155
18156 // If sibling calls have been disabled and tail-calls aren't guaranteed
18157 // there is no reason to duplicate.
18158 auto &TM = getTargetMachine();
18159 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18160 return false;
18161
18162 // Can't tail call a function called indirectly, or if it has variadic args.
18163 const Function *Callee = CI->getCalledFunction();
18164 if (!Callee || Callee->isVarArg())
18165 return false;
18166
18167 // Make sure the callee and caller calling conventions are eligible for tco.
18168 const Function *Caller = CI->getParent()->getParent();
18169 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18170 CI->getCallingConv()))
18171 return false;
18172
18173 // If the function is local then we have a good chance at tail-calling it
18174 return getTargetMachine().shouldAssumeDSOLocal(Callee);
18175}
18176
18177bool PPCTargetLowering::
18178isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18179 const Value *Mask = AndI.getOperand(1);
18180 // If the mask is suitable for andi. or andis. we should sink the and.
18181 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18182 // Can't handle constants wider than 64-bits.
18183 if (CI->getBitWidth() > 64)
18184 return false;
18185 int64_t ConstVal = CI->getZExtValue();
18186 return isUInt<16>(ConstVal) ||
18187 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18188 }
18189
18190 // For non-constant masks, we can always use the record-form and.
18191 return true;
18192}
18193
18194/// getAddrModeForFlags - Based on the set of address flags, select the most
18195/// optimal instruction format to match by.
18196PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18197 // This is not a node we should be handling here.
18198 if (Flags == PPC::MOF_None)
18199 return PPC::AM_None;
18200 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18201 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18202 if ((Flags & FlagSet) == FlagSet)
18203 return PPC::AM_DForm;
18204 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18205 if ((Flags & FlagSet) == FlagSet)
18206 return PPC::AM_DSForm;
18207 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18208 if ((Flags & FlagSet) == FlagSet)
18209 return PPC::AM_DQForm;
18210 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18211 if ((Flags & FlagSet) == FlagSet)
18212 return PPC::AM_PrefixDForm;
18213 // If no other forms are selected, return an X-Form as it is the most
18214 // general addressing mode.
18215 return PPC::AM_XForm;
18216}
18217
18218/// Set alignment flags based on whether or not the Frame Index is aligned.
18219/// Utilized when computing flags for address computation when selecting
18220/// load and store instructions.
18221static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18222 SelectionDAG &DAG) {
18223 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18224 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18225 if (!FI)
18226 return;
18228 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18229 // If this is (add $FI, $S16Imm), the alignment flags are already set
18230 // based on the immediate. We just need to clear the alignment flags
18231 // if the FI alignment is weaker.
18232 if ((FrameIndexAlign % 4) != 0)
18233 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18234 if ((FrameIndexAlign % 16) != 0)
18235 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18236 // If the address is a plain FrameIndex, set alignment flags based on
18237 // FI alignment.
18238 if (!IsAdd) {
18239 if ((FrameIndexAlign % 4) == 0)
18240 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18241 if ((FrameIndexAlign % 16) == 0)
18242 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18243 }
18244}
18245
18246/// Given a node, compute flags that are used for address computation when
18247/// selecting load and store instructions. The flags computed are stored in
18248/// FlagSet. This function takes into account whether the node is a constant,
18249/// an ADD, OR, or a constant, and computes the address flags accordingly.
18250static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18251 SelectionDAG &DAG) {
18252 // Set the alignment flags for the node depending on if the node is
18253 // 4-byte or 16-byte aligned.
18254 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18255 if ((Imm & 0x3) == 0)
18256 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18257 if ((Imm & 0xf) == 0)
18258 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18259 };
18260
18261 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18262 // All 32-bit constants can be computed as LIS + Disp.
18263 const APInt &ConstImm = CN->getAPIntValue();
18264 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18265 FlagSet |= PPC::MOF_AddrIsSImm32;
18266 SetAlignFlagsForImm(ConstImm.getZExtValue());
18267 setAlignFlagsForFI(N, FlagSet, DAG);
18268 }
18269 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18270 FlagSet |= PPC::MOF_RPlusSImm34;
18271 else // Let constant materialization handle large constants.
18272 FlagSet |= PPC::MOF_NotAddNorCst;
18273 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18274 // This address can be represented as an addition of:
18275 // - Register + Imm16 (possibly a multiple of 4/16)
18276 // - Register + Imm34
18277 // - Register + PPCISD::Lo
18278 // - Register + Register
18279 // In any case, we won't have to match this as Base + Zero.
18280 SDValue RHS = N.getOperand(1);
18281 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18282 const APInt &ConstImm = CN->getAPIntValue();
18283 if (ConstImm.isSignedIntN(16)) {
18284 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18285 SetAlignFlagsForImm(ConstImm.getZExtValue());
18286 setAlignFlagsForFI(N, FlagSet, DAG);
18287 }
18288 if (ConstImm.isSignedIntN(34))
18289 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18290 else
18291 FlagSet |= PPC::MOF_RPlusR; // Register.
18292 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18293 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18294 else
18295 FlagSet |= PPC::MOF_RPlusR;
18296 } else { // The address computation is not a constant or an addition.
18297 setAlignFlagsForFI(N, FlagSet, DAG);
18298 FlagSet |= PPC::MOF_NotAddNorCst;
18299 }
18300}
18301
18302static bool isPCRelNode(SDValue N) {
18303 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18304 isValidPCRelNode<ConstantPoolSDNode>(N) ||
18305 isValidPCRelNode<GlobalAddressSDNode>(N) ||
18306 isValidPCRelNode<JumpTableSDNode>(N) ||
18307 isValidPCRelNode<BlockAddressSDNode>(N));
18308}
18309
18310/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18311/// the address flags of the load/store instruction that is to be matched.
18312unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18313 SelectionDAG &DAG) const {
18314 unsigned FlagSet = PPC::MOF_None;
18315
18316 // Compute subtarget flags.
18317 if (!Subtarget.hasP9Vector())
18318 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18319 else
18320 FlagSet |= PPC::MOF_SubtargetP9;
18321
18322 if (Subtarget.hasPrefixInstrs())
18323 FlagSet |= PPC::MOF_SubtargetP10;
18324
18325 if (Subtarget.hasSPE())
18326 FlagSet |= PPC::MOF_SubtargetSPE;
18327
18328 // Check if we have a PCRel node and return early.
18329 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18330 return FlagSet;
18331
18332 // If the node is the paired load/store intrinsics, compute flags for
18333 // address computation and return early.
18334 unsigned ParentOp = Parent->getOpcode();
18335 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18336 (ParentOp == ISD::INTRINSIC_VOID))) {
18337 unsigned ID = Parent->getConstantOperandVal(1);
18338 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18339 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18340 ? Parent->getOperand(2)
18341 : Parent->getOperand(3);
18342 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18343 FlagSet |= PPC::MOF_Vector;
18344 return FlagSet;
18345 }
18346 }
18347
18348 // Mark this as something we don't want to handle here if it is atomic
18349 // or pre-increment instruction.
18350 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18351 if (LSB->isIndexed())
18352 return PPC::MOF_None;
18353
18354 // Compute in-memory type flags. This is based on if there are scalars,
18355 // floats or vectors.
18356 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18357 assert(MN && "Parent should be a MemSDNode!");
18358 EVT MemVT = MN->getMemoryVT();
18359 unsigned Size = MemVT.getSizeInBits();
18360 if (MemVT.isScalarInteger()) {
18361 assert(Size <= 128 &&
18362 "Not expecting scalar integers larger than 16 bytes!");
18363 if (Size < 32)
18364 FlagSet |= PPC::MOF_SubWordInt;
18365 else if (Size == 32)
18366 FlagSet |= PPC::MOF_WordInt;
18367 else
18368 FlagSet |= PPC::MOF_DoubleWordInt;
18369 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18370 if (Size == 128)
18371 FlagSet |= PPC::MOF_Vector;
18372 else if (Size == 256) {
18373 assert(Subtarget.pairedVectorMemops() &&
18374 "256-bit vectors are only available when paired vector memops is "
18375 "enabled!");
18376 FlagSet |= PPC::MOF_Vector;
18377 } else
18378 llvm_unreachable("Not expecting illegal vectors!");
18379 } else { // Floating point type: can be scalar, f128 or vector types.
18380 if (Size == 32 || Size == 64)
18381 FlagSet |= PPC::MOF_ScalarFloat;
18382 else if (MemVT == MVT::f128 || MemVT.isVector())
18383 FlagSet |= PPC::MOF_Vector;
18384 else
18385 llvm_unreachable("Not expecting illegal scalar floats!");
18386 }
18387
18388 // Compute flags for address computation.
18389 computeFlagsForAddressComputation(N, FlagSet, DAG);
18390
18391 // Compute type extension flags.
18392 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18393 switch (LN->getExtensionType()) {
18394 case ISD::SEXTLOAD:
18395 FlagSet |= PPC::MOF_SExt;
18396 break;
18397 case ISD::EXTLOAD:
18398 case ISD::ZEXTLOAD:
18399 FlagSet |= PPC::MOF_ZExt;
18400 break;
18401 case ISD::NON_EXTLOAD:
18402 FlagSet |= PPC::MOF_NoExt;
18403 break;
18404 }
18405 } else
18406 FlagSet |= PPC::MOF_NoExt;
18407
18408 // For integers, no extension is the same as zero extension.
18409 // We set the extension mode to zero extension so we don't have
18410 // to add separate entries in AddrModesMap for loads and stores.
18411 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18412 FlagSet |= PPC::MOF_ZExt;
18413 FlagSet &= ~PPC::MOF_NoExt;
18414 }
18415
18416 // If we don't have prefixed instructions, 34-bit constants should be
18417 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18418 bool IsNonP1034BitConst =
18420 FlagSet) == PPC::MOF_RPlusSImm34;
18421 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18422 IsNonP1034BitConst)
18423 FlagSet |= PPC::MOF_NotAddNorCst;
18424
18425 return FlagSet;
18426}
18427
18428/// SelectForceXFormMode - Given the specified address, force it to be
18429/// represented as an indexed [r+r] operation (an XForm instruction).
18431 SDValue &Base,
18432 SelectionDAG &DAG) const {
18433
18435 int16_t ForceXFormImm = 0;
18436 if (provablyDisjointOr(DAG, N) &&
18437 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18438 Disp = N.getOperand(0);
18439 Base = N.getOperand(1);
18440 return Mode;
18441 }
18442
18443 // If the address is the result of an add, we will utilize the fact that the
18444 // address calculation includes an implicit add. However, we can reduce
18445 // register pressure if we do not materialize a constant just for use as the
18446 // index register. We only get rid of the add if it is not an add of a
18447 // value and a 16-bit signed constant and both have a single use.
18448 if (N.getOpcode() == ISD::ADD &&
18449 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18450 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18451 Disp = N.getOperand(0);
18452 Base = N.getOperand(1);
18453 return Mode;
18454 }
18455
18456 // Otherwise, use R0 as the base register.
18457 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18458 N.getValueType());
18459 Base = N;
18460
18461 return Mode;
18462}
18463
18465 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18466 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18467 EVT ValVT = Val.getValueType();
18468 // If we are splitting a scalar integer into f64 parts (i.e. so they
18469 // can be placed into VFRC registers), we need to zero extend and
18470 // bitcast the values. This will ensure the value is placed into a
18471 // VSR using direct moves or stack operations as needed.
18472 if (PartVT == MVT::f64 &&
18473 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18474 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18475 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18476 Parts[0] = Val;
18477 return true;
18478 }
18479 return false;
18480}
18481
18482SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18483 SelectionDAG &DAG) const {
18484 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18486 EVT RetVT = Op.getValueType();
18487 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18488 SDValue Callee =
18489 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18490 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18493 for (const SDValue &N : Op->op_values()) {
18494 EVT ArgVT = N.getValueType();
18495 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18496 Entry.Node = N;
18497 Entry.Ty = ArgTy;
18498 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18499 Entry.IsZExt = !Entry.IsSExt;
18500 Args.push_back(Entry);
18501 }
18502
18503 SDValue InChain = DAG.getEntryNode();
18504 SDValue TCChain = InChain;
18505 const Function &F = DAG.getMachineFunction().getFunction();
18506 bool isTailCall =
18507 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18508 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18509 if (isTailCall)
18510 InChain = TCChain;
18511 CLI.setDebugLoc(SDLoc(Op))
18512 .setChain(InChain)
18513 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18514 .setTailCall(isTailCall)
18515 .setSExtResult(SignExtend)
18516 .setZExtResult(!SignExtend)
18518 return TLI.LowerCallTo(CLI).first;
18519}
18520
18521SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18522 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18523 SelectionDAG &DAG) const {
18524 if (Op.getValueType() == MVT::f32)
18525 return lowerToLibCall(LibCallFloatName, Op, DAG);
18526
18527 if (Op.getValueType() == MVT::f64)
18528 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18529
18530 return SDValue();
18531}
18532
18533bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18534 SDNodeFlags Flags = Op.getNode()->getFlags();
18535 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18536 Flags.hasNoNaNs() && Flags.hasNoInfs();
18537}
18538
18539bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18540 return Op.getNode()->getFlags().hasApproximateFuncs();
18541}
18542
18543bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18545}
18546
18547SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18548 const char *LibCallFloatName,
18549 const char *LibCallDoubleNameFinite,
18550 const char *LibCallFloatNameFinite,
18551 SDValue Op,
18552 SelectionDAG &DAG) const {
18553 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18554 return SDValue();
18555
18556 if (!isLowringToMASSFiniteSafe(Op))
18557 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18558 DAG);
18559
18560 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18561 LibCallDoubleNameFinite, Op, DAG);
18562}
18563
18564SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18565 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18566 "__xl_powf_finite", Op, DAG);
18567}
18568
18569SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18570 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18571 "__xl_sinf_finite", Op, DAG);
18572}
18573
18574SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18575 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18576 "__xl_cosf_finite", Op, DAG);
18577}
18578
18579SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18580 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18581 "__xl_logf_finite", Op, DAG);
18582}
18583
18584SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18585 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18586 "__xl_log10f_finite", Op, DAG);
18587}
18588
18589SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18590 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18591 "__xl_expf_finite", Op, DAG);
18592}
18593
18594// If we happen to match to an aligned D-Form, check if the Frame Index is
18595// adequately aligned. If it is not, reset the mode to match to X-Form.
18596static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18597 PPC::AddrMode &Mode) {
18598 if (!isa<FrameIndexSDNode>(N))
18599 return;
18600 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18601 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18602 Mode = PPC::AM_XForm;
18603}
18604
18605/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18606/// compute the address flags of the node, get the optimal address mode based
18607/// on the flags, and set the Base and Disp based on the address mode.
18609 SDValue N, SDValue &Disp,
18610 SDValue &Base,
18611 SelectionDAG &DAG,
18612 MaybeAlign Align) const {
18613 SDLoc DL(Parent);
18614
18615 // Compute the address flags.
18616 unsigned Flags = computeMOFlags(Parent, N, DAG);
18617
18618 // Get the optimal address mode based on the Flags.
18619 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18620
18621 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18622 // Select an X-Form load if it is not.
18623 setXFormForUnalignedFI(N, Flags, Mode);
18624
18625 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18626 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18627 assert(Subtarget.isUsingPCRelativeCalls() &&
18628 "Must be using PC-Relative calls when a valid PC-Relative node is "
18629 "present!");
18630 Mode = PPC::AM_PCRel;
18631 }
18632
18633 // Set Base and Disp accordingly depending on the address mode.
18634 switch (Mode) {
18635 case PPC::AM_DForm:
18636 case PPC::AM_DSForm:
18637 case PPC::AM_DQForm: {
18638 // This is a register plus a 16-bit immediate. The base will be the
18639 // register and the displacement will be the immediate unless it
18640 // isn't sufficiently aligned.
18641 if (Flags & PPC::MOF_RPlusSImm16) {
18642 SDValue Op0 = N.getOperand(0);
18643 SDValue Op1 = N.getOperand(1);
18644 int16_t Imm = Op1->getAsZExtVal();
18645 if (!Align || isAligned(*Align, Imm)) {
18646 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18647 Base = Op0;
18648 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18649 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18650 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18651 }
18652 break;
18653 }
18654 }
18655 // This is a register plus the @lo relocation. The base is the register
18656 // and the displacement is the global address.
18657 else if (Flags & PPC::MOF_RPlusLo) {
18658 Disp = N.getOperand(1).getOperand(0); // The global address.
18663 Base = N.getOperand(0);
18664 break;
18665 }
18666 // This is a constant address at most 32 bits. The base will be
18667 // zero or load-immediate-shifted and the displacement will be
18668 // the low 16 bits of the address.
18669 else if (Flags & PPC::MOF_AddrIsSImm32) {
18670 auto *CN = cast<ConstantSDNode>(N);
18671 EVT CNType = CN->getValueType(0);
18672 uint64_t CNImm = CN->getZExtValue();
18673 // If this address fits entirely in a 16-bit sext immediate field, codegen
18674 // this as "d, 0".
18675 int16_t Imm;
18676 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18677 Disp = DAG.getTargetConstant(Imm, DL, CNType);
18678 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18679 CNType);
18680 break;
18681 }
18682 // Handle 32-bit sext immediate with LIS + Addr mode.
18683 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18684 (!Align || isAligned(*Align, CNImm))) {
18685 int32_t Addr = (int32_t)CNImm;
18686 // Otherwise, break this down into LIS + Disp.
18687 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18688 Base =
18689 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18690 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18691 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18692 break;
18693 }
18694 }
18695 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18696 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18697 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18698 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18699 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18700 } else
18701 Base = N;
18702 break;
18703 }
18704 case PPC::AM_PrefixDForm: {
18705 int64_t Imm34 = 0;
18706 unsigned Opcode = N.getOpcode();
18707 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18708 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18709 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18710 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18711 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18712 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18713 else
18714 Base = N.getOperand(0);
18715 } else if (isIntS34Immediate(N, Imm34)) {
18716 // The address is a 34-bit signed immediate.
18717 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18718 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18719 }
18720 break;
18721 }
18722 case PPC::AM_PCRel: {
18723 // When selecting PC-Relative instructions, "Base" is not utilized as
18724 // we select the address as [PC+imm].
18725 Disp = N;
18726 break;
18727 }
18728 case PPC::AM_None:
18729 break;
18730 default: { // By default, X-Form is always available to be selected.
18731 // When a frame index is not aligned, we also match by XForm.
18732 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18733 Base = FI ? N : N.getOperand(1);
18734 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18735 N.getValueType())
18736 : N.getOperand(0);
18737 break;
18738 }
18739 }
18740 return Mode;
18741}
18742
18744 bool Return,
18745 bool IsVarArg) const {
18746 switch (CC) {
18747 case CallingConv::Cold:
18748 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18749 default:
18750 return CC_PPC64_ELF;
18751 }
18752}
18753
18755 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18756}
18757
18760 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18761 if (shouldInlineQuadwordAtomics() && Size == 128)
18763
18764 switch (AI->getOperation()) {
18768 default:
18770 }
18771
18772 llvm_unreachable("unreachable atomicrmw operation");
18773}
18774
18777 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18778 if (shouldInlineQuadwordAtomics() && Size == 128)
18781}
18782
18783static Intrinsic::ID
18785 switch (BinOp) {
18786 default:
18787 llvm_unreachable("Unexpected AtomicRMW BinOp");
18789 return Intrinsic::ppc_atomicrmw_xchg_i128;
18790 case AtomicRMWInst::Add:
18791 return Intrinsic::ppc_atomicrmw_add_i128;
18792 case AtomicRMWInst::Sub:
18793 return Intrinsic::ppc_atomicrmw_sub_i128;
18794 case AtomicRMWInst::And:
18795 return Intrinsic::ppc_atomicrmw_and_i128;
18796 case AtomicRMWInst::Or:
18797 return Intrinsic::ppc_atomicrmw_or_i128;
18798 case AtomicRMWInst::Xor:
18799 return Intrinsic::ppc_atomicrmw_xor_i128;
18801 return Intrinsic::ppc_atomicrmw_nand_i128;
18802 }
18803}
18804
18806 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18807 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18808 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18809 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18810 Type *ValTy = Incr->getType();
18811 assert(ValTy->getPrimitiveSizeInBits() == 128);
18814 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18815 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18816 Value *IncrHi =
18817 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18818 Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
18819 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18820 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18821 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18822 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18823 return Builder.CreateOr(
18824 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18825}
18826
18828 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18829 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18830 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18831 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18832 Type *ValTy = CmpVal->getType();
18833 assert(ValTy->getPrimitiveSizeInBits() == 128);
18834 Function *IntCmpXchg =
18835 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18836 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18837 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18838 Value *CmpHi =
18839 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18840 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18841 Value *NewHi =
18842 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18843 emitLeadingFence(Builder, CI, Ord);
18844 Value *LoHi =
18845 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
18846 emitTrailingFence(Builder, CI, Ord);
18847 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18848 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18849 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18850 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18851 return Builder.CreateOr(
18852 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18853}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
This defines the Use class.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
Value * RHS
Value * LHS
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5317
bool isDenormal() const
Definition: APFloat.h:1355
APInt bitcastToAPInt() const
Definition: APFloat.h:1260
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1387
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:429
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
APInt abs() const
Get the absolute value.
Definition: APInt.h:1753
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:309
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:415
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:451
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1680
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ Xor
*p = old ^ v
Definition: Instructions.h:722
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
BinOp getOperation() const
Definition: Instructions.h:787
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:391
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:168
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:890
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1971
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1523
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1385
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1391
unsigned arg_size() const
Definition: InstrTypes.h:1408
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:900
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:865
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:698
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:362
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:745
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:757
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
BasicBlockListType::const_iterator const_iterator
Definition: Function.h:70
arg_iterator arg_begin()
Definition: Function.h:831
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
size_t arg_size() const
Definition: Function.h:864
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:225
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:582
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:267
bool hasHiddenVisibility() const
Definition: GlobalValue.h:250
StringRef getSection() const
Definition: Globals.cpp:183
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:124
bool hasComdat() const
Definition: GlobalValue.h:241
Type * getValueType() const
Definition: GlobalValue.h:296
bool hasProtectedVisibility() const
Definition: GlobalValue.h:251
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2521
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1442
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1421
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2026
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2012
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1502
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2417
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:174
bool isUnordered() const
Definition: Instructions.h:247
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
Metadata node.
Definition: Metadata.h:1067
Machine Value Type.
SimpleValueType SimpleTy
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
bool isAIXFuncTLSModelOptInitDone() const
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:300
bool is32BitELFABI() const
Definition: PPCSubtarget.h:219
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:259
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool useSoftFloat() const
Definition: PPCSubtarget.h:174
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:142
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:202
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:253
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:271
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:145
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:134
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:206
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
bool isTargetLinux() const
Definition: PPCSubtarget.h:212
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:277
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:289
bool is64BitELFABI() const
Definition: PPCSubtarget.h:218
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:155
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:295
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:152
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:265
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:734
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:488
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:492
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:744
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:840
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:486
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:739
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:487
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:785
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:688
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:780
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:481
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:811
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:857
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:499
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:751
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:568
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Class to represent struct types.
Definition: DerivedTypes.h:216
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:246
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:811
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:778
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1167
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1163
@ TargetConstantPool
Definition: ISDOpcodes.h:174
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:742
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1196
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1282
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1072
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:811
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:818
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:941
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:931
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1240
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:974
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:802
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1088
@ TargetJumpTable
Definition: ISDOpcodes.h:173
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1262
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1029
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:958
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1118
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1097
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:755
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1278
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1192
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:908
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:733
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1019
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:438
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:808
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:439
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:770
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1289
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1006
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1082
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:826
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:916
@ STRICT_FROUND
Definition: ISDOpcodes.h:442
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:764
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:463
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1138
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:864
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1223
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1249
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:897
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1135
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:814
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1187
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1111
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:791
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1181
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1361
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1246
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1607
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1523
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1574
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1554
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1613
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1513
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:91
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:200
@ MO_TPREL_HA
Definition: PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition: PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TPREL_LO
Definition: PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:160
@ MO_HA
Definition: PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:105
@ XTY_ER
External reference.
Definition: XCOFF.h:241
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:555
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:573
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:276
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:279
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:254
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:56
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)