LLVM 19.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallSet.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/CallingConv.h"
61#include "llvm/IR/Constant.h"
62#include "llvm/IR/Constants.h"
63#include "llvm/IR/DataLayout.h"
64#include "llvm/IR/DebugLoc.h"
66#include "llvm/IR/Function.h"
67#include "llvm/IR/GlobalValue.h"
68#include "llvm/IR/IRBuilder.h"
70#include "llvm/IR/Intrinsics.h"
71#include "llvm/IR/IntrinsicsPowerPC.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/Value.h"
76#include "llvm/MC/MCContext.h"
77#include "llvm/MC/MCExpr.h"
87#include "llvm/Support/Debug.h"
89#include "llvm/Support/Format.h"
95#include <algorithm>
96#include <cassert>
97#include <cstdint>
98#include <iterator>
99#include <list>
100#include <optional>
101#include <utility>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE "ppc-lowering"
107
108static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
109cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
110
111static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
112cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
113
114static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
115cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
116
117static cl::opt<bool> DisableSCO("disable-ppc-sco",
118cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
119
120static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
121cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
122
123static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
124cl::desc("use absolute jump tables on ppc"), cl::Hidden);
125
126static cl::opt<bool>
127 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
128 cl::desc("disable vector permute decomposition"),
129 cl::init(true), cl::Hidden);
130
132 "disable-auto-paired-vec-st",
133 cl::desc("disable automatically generated 32byte paired vector stores"),
134 cl::init(true), cl::Hidden);
135
137 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
138 cl::desc("Set minimum number of entries to use a jump table on PPC"));
139
141 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
142 cl::desc("max depth when checking alias info in GatherAllAliases()"));
143
145 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
146 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
147 "function to use initial-exec"));
148
149STATISTIC(NumTailCalls, "Number of tail calls");
150STATISTIC(NumSiblingCalls, "Number of sibling calls");
151STATISTIC(ShufflesHandledWithVPERM,
152 "Number of shuffles lowered to a VPERM or XXPERM");
153STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
154
155static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
156
157static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
158
159static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
160
161// A faster local-[exec|dynamic] TLS access sequence (enabled with the
162// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
163// variables; consistent with the IBM XL compiler, we apply a max size of
164// slightly under 32KB.
166
167// FIXME: Remove this once the bug has been fixed!
169
171 const PPCSubtarget &STI)
172 : TargetLowering(TM), Subtarget(STI) {
173 // Initialize map that relates the PPC addressing modes to the computed flags
174 // of a load/store instruction. The map is used to determine the optimal
175 // addressing mode when selecting load and stores.
176 initializeAddrModeMap();
177 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
178 // arguments are at least 4/8 bytes aligned.
179 bool isPPC64 = Subtarget.isPPC64();
180 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
181
182 // Set up the register classes.
183 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
184 if (!useSoftFloat()) {
185 if (hasSPE()) {
186 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
187 // EFPU2 APU only supports f32
188 if (!Subtarget.hasEFPU2())
189 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
190 } else {
191 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
192 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
193 }
194 }
195
196 // Match BITREVERSE to customized fast code sequence in the td file.
199
200 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
202
203 // Custom lower inline assembly to check for special registers.
206
207 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
208 for (MVT VT : MVT::integer_valuetypes()) {
211 }
212
213 if (Subtarget.isISA3_0()) {
214 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
215 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
216 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
217 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
218 } else {
219 // No extending loads from f16 or HW conversions back and forth.
220 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
223 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
226 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
227 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
228 }
229
230 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
231
232 // PowerPC has pre-inc load and store's.
243 if (!Subtarget.hasSPE()) {
248 }
249
250 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
251 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
252 for (MVT VT : ScalarIntVTs) {
257 }
258
259 if (Subtarget.useCRBits()) {
261
262 if (isPPC64 || Subtarget.hasFPCVT()) {
265 isPPC64 ? MVT::i64 : MVT::i32);
268 isPPC64 ? MVT::i64 : MVT::i32);
269
272 isPPC64 ? MVT::i64 : MVT::i32);
275 isPPC64 ? MVT::i64 : MVT::i32);
276
279 isPPC64 ? MVT::i64 : MVT::i32);
282 isPPC64 ? MVT::i64 : MVT::i32);
283
286 isPPC64 ? MVT::i64 : MVT::i32);
289 isPPC64 ? MVT::i64 : MVT::i32);
290 } else {
295 }
296
297 // PowerPC does not support direct load/store of condition registers.
300
301 // FIXME: Remove this once the ANDI glue bug is fixed:
302 if (ANDIGlueBug)
304
305 for (MVT VT : MVT::integer_valuetypes()) {
308 setTruncStoreAction(VT, MVT::i1, Expand);
309 }
310
311 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
312 }
313
314 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
315 // PPC (the libcall is not available).
320
321 // We do not currently implement these libm ops for PowerPC.
322 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
323 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
324 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
325 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
327 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
328
329 // PowerPC has no SREM/UREM instructions unless we are on P9
330 // On P9 we may use a hardware instruction to compute the remainder.
331 // When the result of both the remainder and the division is required it is
332 // more efficient to compute the remainder from the result of the division
333 // rather than use the remainder instruction. The instructions are legalized
334 // directly because the DivRemPairsPass performs the transformation at the IR
335 // level.
336 if (Subtarget.isISA3_0()) {
341 } else {
346 }
347
348 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
357
358 // Handle constrained floating-point operations of scalar.
359 // TODO: Handle SPE specific operation.
365
370
371 if (!Subtarget.hasSPE()) {
374 }
375
376 if (Subtarget.hasVSX()) {
379 }
380
381 if (Subtarget.hasFSQRT()) {
384 }
385
386 if (Subtarget.hasFPRND()) {
391
396 }
397
398 // We don't support sin/cos/sqrt/fmod/pow
409
410 // MASS transformation for LLVM intrinsics with replicating fast-math flag
411 // to be consistent to PPCGenScalarMASSEntries pass
412 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
425 }
426
427 if (Subtarget.hasSPE()) {
430 } else {
431 setOperationAction(ISD::FMA , MVT::f64, Legal);
432 setOperationAction(ISD::FMA , MVT::f32, Legal);
433 }
434
435 if (Subtarget.hasSPE())
436 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
437
439
440 // If we're enabling GP optimizations, use hardware square root
441 if (!Subtarget.hasFSQRT() &&
442 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
443 Subtarget.hasFRE()))
445
446 if (!Subtarget.hasFSQRT() &&
447 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
448 Subtarget.hasFRES()))
450
451 if (Subtarget.hasFCPSGN()) {
454 } else {
457 }
458
459 if (Subtarget.hasFPRND()) {
464
469 }
470
471 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
472 // instruction xxbrd to speed up scalar BSWAP64.
473 if (Subtarget.isISA3_1()) {
476 } else {
479 ISD::BSWAP, MVT::i64,
480 (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
481 }
482
483 // CTPOP or CTTZ were introduced in P8/P9 respectively
484 if (Subtarget.isISA3_0()) {
485 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
486 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
487 } else {
488 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
489 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
490 }
491
492 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
495 } else {
498 }
499
500 // PowerPC does not have ROTR
503
504 if (!Subtarget.useCRBits()) {
505 // PowerPC does not have Select
510 }
511
512 // PowerPC wants to turn select_cc of FP into fsel when possible.
515
516 // PowerPC wants to optimize integer setcc a bit
517 if (!Subtarget.useCRBits())
519
520 if (Subtarget.hasFPU()) {
524
528 }
529
530 // PowerPC does not have BRCOND which requires SetCC
531 if (!Subtarget.useCRBits())
533
535
536 if (Subtarget.hasSPE()) {
537 // SPE has built-in conversions
544
545 // SPE supports signaling compare of f32/f64.
548 } else {
549 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
552
553 // PowerPC does not have [U|S]INT_TO_FP
558 }
559
560 if (Subtarget.hasDirectMove() && isPPC64) {
565 if (TM.Options.UnsafeFPMath) {
574 }
575 } else {
580 }
581
582 // We cannot sextinreg(i1). Expand to shifts.
584
585 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
586 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
587 // support continuation, user-level threading, and etc.. As a result, no
588 // other SjLj exception interfaces are implemented and please don't build
589 // your own exception handling based on them.
590 // LLVM/Clang supports zero-cost DWARF exception handling.
593
594 // We want to legalize GlobalAddress and ConstantPool nodes into the
595 // appropriate instructions to materialize the address.
606
607 // TRAP is legal.
608 setOperationAction(ISD::TRAP, MVT::Other, Legal);
609
610 // TRAMPOLINE is custom lowered.
613
614 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
616
617 if (Subtarget.is64BitELFABI()) {
618 // VAARG always uses double-word chunks, so promote anything smaller.
620 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
622 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
624 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
626 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
628 } else if (Subtarget.is32BitELFABI()) {
629 // VAARG is custom lowered with the 32-bit SVR4 ABI.
632 } else
634
635 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
636 if (Subtarget.is32BitELFABI())
638 else
640
641 // Use the default implementation.
642 setOperationAction(ISD::VAEND , MVT::Other, Expand);
651
652 // We want to custom lower some of our intrinsics.
658
659 // To handle counter-based loop conditions.
661
666
667 // Comparisons that require checking two conditions.
668 if (Subtarget.hasSPE()) {
673 }
686
689
690 if (Subtarget.has64BitSupport()) {
691 // They also have instructions for converting between i64 and fp.
700 // This is just the low 32 bits of a (signed) fp->i64 conversion.
701 // We cannot do this with Promote because i64 is not a legal type.
704
705 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
708 }
709 } else {
710 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
711 if (Subtarget.hasSPE()) {
714 } else {
717 }
718 }
719
720 // With the instructions enabled under FPCVT, we can do everything.
721 if (Subtarget.hasFPCVT()) {
722 if (Subtarget.has64BitSupport()) {
731 }
732
741 }
742
743 if (Subtarget.use64BitRegs()) {
744 // 64-bit PowerPC implementations can support i64 types directly
745 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
746 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
748 // 64-bit PowerPC wants to expand i128 shifts itself.
752 } else {
753 // 32-bit PowerPC wants to expand i64 shifts itself.
757 }
758
759 // PowerPC has better expansions for funnel shifts than the generic
760 // TargetLowering::expandFunnelShift.
761 if (Subtarget.has64BitSupport()) {
764 }
767
768 if (Subtarget.hasVSX()) {
773 }
774
775 if (Subtarget.hasAltivec()) {
776 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
781 }
782 // First set operation action for all vector types to expand. Then we
783 // will selectively turn on ones that can be effectively codegen'd.
785 // add/sub are legal for all supported vector VT's.
788
789 // For v2i64, these are only valid with P8Vector. This is corrected after
790 // the loop.
791 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
796 }
797 else {
802 }
803
804 if (Subtarget.hasVSX()) {
807 }
808
809 // Vector instructions introduced in P8
810 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
813 }
814 else {
817 }
818
819 // Vector instructions introduced in P9
820 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
822 else
824
825 // We promote all shuffles to v16i8.
827 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
828
829 // We promote all non-typed operations to v4i32.
831 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
833 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
835 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
837 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
839 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
842 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
844 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
845
846 // No other operations are legal.
885
886 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
887 setTruncStoreAction(VT, InnerVT, Expand);
890 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
891 }
892 }
894 if (!Subtarget.hasP8Vector()) {
895 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
896 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
897 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
898 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
899 }
900
901 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
902 // with merges, splats, etc.
904
905 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
906 // are cheap, so handle them before they get expanded to scalar.
912
913 setOperationAction(ISD::AND , MVT::v4i32, Legal);
914 setOperationAction(ISD::OR , MVT::v4i32, Legal);
915 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
916 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
918 Subtarget.useCRBits() ? Legal : Expand);
919 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
929 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
932
933 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
934 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
935 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
936 if (Subtarget.hasAltivec())
937 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
939 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
940 if (Subtarget.hasP8Altivec())
941 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
942
943 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
944 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
945 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
946 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
947
948 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
949 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
950
951 if (Subtarget.hasVSX()) {
952 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
953 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
955 }
956
957 if (Subtarget.hasP8Altivec())
958 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
959 else
960 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
961
962 if (Subtarget.isISA3_1()) {
963 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
964 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
965 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
966 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
967 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
968 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
969 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
970 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
971 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
972 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
973 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
974 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
975 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
976 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
977 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
978 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
979 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
980 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
981 }
982
983 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
984 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
985
988
993
994 // Altivec does not contain unordered floating-point compare instructions
995 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
997 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
999
1000 if (Subtarget.hasVSX()) {
1003 if (Subtarget.hasP8Vector()) {
1006 }
1007 if (Subtarget.hasDirectMove() && isPPC64) {
1016 }
1018
1019 // The nearbyint variants are not allowed to raise the inexact exception
1020 // so we can only code-gen them with unsafe math.
1021 if (TM.Options.UnsafeFPMath) {
1024 }
1025
1026 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1027 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1028 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1030 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1031 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1034
1036 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1037 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1040
1041 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1042 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1043
1044 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1045 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1046
1047 // Share the Altivec comparison restrictions.
1048 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1049 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1050 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1051 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1052
1053 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1054 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1055
1057
1058 if (Subtarget.hasP8Vector())
1059 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1060
1061 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1062
1063 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1064 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1065 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1066
1067 if (Subtarget.hasP8Altivec()) {
1068 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1069 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1070 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1071
1072 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1073 // SRL, but not for SRA because of the instructions available:
1074 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1075 // doing
1076 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1077 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1078 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1079
1080 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1081 }
1082 else {
1083 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1084 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1085 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1086
1087 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1088
1089 // VSX v2i64 only supports non-arithmetic operations.
1090 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1091 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1092 }
1093
1094 if (Subtarget.isISA3_1())
1095 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1096 else
1097 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1098
1099 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1100 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1102 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1103
1105
1114
1115 // Custom handling for partial vectors of integers converted to
1116 // floating point. We already have optimal handling for v2i32 through
1117 // the DAG combine, so those aren't necessary.
1134
1135 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1136 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1137 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1138 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1141
1144
1145 // Handle constrained floating-point operations of vector.
1146 // The predictor is `hasVSX` because altivec instruction has
1147 // no exception but VSX vector instruction has.
1161
1175
1176 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1177 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1178
1179 for (MVT FPT : MVT::fp_valuetypes())
1180 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1181
1182 // Expand the SELECT to SELECT_CC
1184
1185 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1186 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1187
1188 // No implementation for these ops for PowerPC.
1190 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1191 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1192 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1194 setOperationAction(ISD::FREM, MVT::f128, Expand);
1195 }
1196
1197 if (Subtarget.hasP8Altivec()) {
1198 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1199 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1200 }
1201
1202 if (Subtarget.hasP9Vector()) {
1205
1206 // Test data class instructions store results in CR bits.
1207 if (Subtarget.useCRBits()) {
1211 }
1212
1213 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1214 // SRL, but not for SRA because of the instructions available:
1215 // VS{RL} and VS{RL}O.
1216 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1217 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1218 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1219
1220 setOperationAction(ISD::FADD, MVT::f128, Legal);
1221 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1222 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1223 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1225
1226 setOperationAction(ISD::FMA, MVT::f128, Legal);
1233
1235 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1237 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1240
1244
1245 // Handle constrained floating-point operations of fp128
1262 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1263 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1264 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1265 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1266 } else if (Subtarget.hasVSX()) {
1269
1270 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1271 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1272
1273 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1274 // fp_to_uint and int_to_fp.
1277
1278 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1279 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1280 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1281 setOperationAction(ISD::FABS, MVT::f128, Expand);
1283 setOperationAction(ISD::FMA, MVT::f128, Expand);
1285
1286 // Expand the fp_extend if the target type is fp128.
1289
1290 // Expand the fp_round if the source type is fp128.
1291 for (MVT VT : {MVT::f32, MVT::f64}) {
1294 }
1295
1300
1301 // Lower following f128 select_cc pattern:
1302 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1304
1305 // We need to handle f128 SELECT_CC with integer result type.
1307 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1308 }
1309
1310 if (Subtarget.hasP9Altivec()) {
1311 if (Subtarget.isISA3_1()) {
1316 } else {
1319 }
1327
1328 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1329 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1330 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1331 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1332 }
1333
1334 if (Subtarget.hasP10Vector()) {
1336 }
1337 }
1338
1339 if (Subtarget.pairedVectorMemops()) {
1340 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1341 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1342 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1343 }
1344 if (Subtarget.hasMMA()) {
1345 if (Subtarget.isISAFuture())
1346 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1347 else
1348 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1349 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1350 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1352 }
1353
1354 if (Subtarget.has64BitSupport())
1356
1357 if (Subtarget.isISA3_1())
1358 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1359
1360 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1361
1362 if (!isPPC64) {
1365 }
1366
1371 }
1372
1374
1375 if (Subtarget.hasAltivec()) {
1376 // Altivec instructions set fields to all zeros or all ones.
1378 }
1379
1380 setLibcallName(RTLIB::MULO_I128, nullptr);
1381 if (!isPPC64) {
1382 // These libcalls are not available in 32-bit.
1383 setLibcallName(RTLIB::SHL_I128, nullptr);
1384 setLibcallName(RTLIB::SRL_I128, nullptr);
1385 setLibcallName(RTLIB::SRA_I128, nullptr);
1386 setLibcallName(RTLIB::MUL_I128, nullptr);
1387 setLibcallName(RTLIB::MULO_I64, nullptr);
1388 }
1389
1392 else if (isPPC64)
1394 else
1396
1397 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1398
1399 // We have target-specific dag combine patterns for the following nodes:
1402 if (Subtarget.hasFPCVT())
1405 if (Subtarget.useCRBits())
1409
1411
1413
1414 if (Subtarget.useCRBits()) {
1416 }
1417
1418 setLibcallName(RTLIB::LOG_F128, "logf128");
1419 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1420 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1421 setLibcallName(RTLIB::EXP_F128, "expf128");
1422 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1423 setLibcallName(RTLIB::SIN_F128, "sinf128");
1424 setLibcallName(RTLIB::COS_F128, "cosf128");
1425 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1426 setLibcallName(RTLIB::POW_F128, "powf128");
1427 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1428 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1429 setLibcallName(RTLIB::REM_F128, "fmodf128");
1430 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1431 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1432 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1433 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1434 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1435 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1436 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1437 setLibcallName(RTLIB::RINT_F128, "rintf128");
1438 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1439 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1440 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1441 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1442 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1443
1444 if (Subtarget.isAIXABI()) {
1445 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1446 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1447 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1448 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1449 }
1450
1451 // With 32 condition bits, we don't need to sink (and duplicate) compares
1452 // aggressively in CodeGenPrep.
1453 if (Subtarget.useCRBits()) {
1456 }
1457
1458 // TODO: The default entry number is set to 64. This stops most jump table
1459 // generation on PPC. But it is good for current PPC HWs because the indirect
1460 // branch instruction mtctr to the jump table may lead to bad branch predict.
1461 // Re-evaluate this value on future HWs that can do better with mtctr.
1463
1465
1466 switch (Subtarget.getCPUDirective()) {
1467 default: break;
1468 case PPC::DIR_970:
1469 case PPC::DIR_A2:
1470 case PPC::DIR_E500:
1471 case PPC::DIR_E500mc:
1472 case PPC::DIR_E5500:
1473 case PPC::DIR_PWR4:
1474 case PPC::DIR_PWR5:
1475 case PPC::DIR_PWR5X:
1476 case PPC::DIR_PWR6:
1477 case PPC::DIR_PWR6X:
1478 case PPC::DIR_PWR7:
1479 case PPC::DIR_PWR8:
1480 case PPC::DIR_PWR9:
1481 case PPC::DIR_PWR10:
1485 break;
1486 }
1487
1488 if (Subtarget.enableMachineScheduler())
1490 else
1492
1494
1495 // The Freescale cores do better with aggressive inlining of memcpy and
1496 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1497 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1498 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1499 MaxStoresPerMemset = 32;
1501 MaxStoresPerMemcpy = 32;
1505 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1506 // The A2 also benefits from (very) aggressive inlining of memcpy and
1507 // friends. The overhead of a the function call, even when warm, can be
1508 // over one hundred cycles.
1509 MaxStoresPerMemset = 128;
1510 MaxStoresPerMemcpy = 128;
1511 MaxStoresPerMemmove = 128;
1512 MaxLoadsPerMemcmp = 128;
1513 } else {
1516 }
1517
1518 IsStrictFPEnabled = true;
1519
1520 // Let the subtarget (CPU) decide if a predictable select is more expensive
1521 // than the corresponding branch. This information is used in CGP to decide
1522 // when to convert selects into branches.
1524
1526}
1527
1528// *********************************** NOTE ************************************
1529// For selecting load and store instructions, the addressing modes are defined
1530// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1531// patterns to match the load the store instructions.
1532//
1533// The TD definitions for the addressing modes correspond to their respective
1534// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1535// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1536// address mode flags of a particular node. Afterwards, the computed address
1537// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1538// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1539// accordingly, based on the preferred addressing mode.
1540//
1541// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1542// MemOpFlags contains all the possible flags that can be used to compute the
1543// optimal addressing mode for load and store instructions.
1544// AddrMode contains all the possible load and store addressing modes available
1545// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1546//
1547// When adding new load and store instructions, it is possible that new address
1548// flags may need to be added into MemOpFlags, and a new addressing mode will
1549// need to be added to AddrMode. An entry of the new addressing mode (consisting
1550// of the minimal and main distinguishing address flags for the new load/store
1551// instructions) will need to be added into initializeAddrModeMap() below.
1552// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1553// need to be updated to account for selecting the optimal addressing mode.
1554// *****************************************************************************
1555/// Initialize the map that relates the different addressing modes of the load
1556/// and store instructions to a set of flags. This ensures the load/store
1557/// instruction is correctly matched during instruction selection.
1558void PPCTargetLowering::initializeAddrModeMap() {
1559 AddrModesMap[PPC::AM_DForm] = {
1560 // LWZ, STW
1565 // LBZ, LHZ, STB, STH
1570 // LHA
1575 // LFS, LFD, STFS, STFD
1580 };
1581 AddrModesMap[PPC::AM_DSForm] = {
1582 // LWA
1586 // LD, STD
1590 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1594 };
1595 AddrModesMap[PPC::AM_DQForm] = {
1596 // LXV, STXV
1600 };
1601 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1603 // TODO: Add mapping for quadword load/store.
1604}
1605
1606/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1607/// the desired ByVal argument alignment.
1608static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1609 if (MaxAlign == MaxMaxAlign)
1610 return;
1611 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1612 if (MaxMaxAlign >= 32 &&
1613 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1614 MaxAlign = Align(32);
1615 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1616 MaxAlign < 16)
1617 MaxAlign = Align(16);
1618 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1619 Align EltAlign;
1620 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1621 if (EltAlign > MaxAlign)
1622 MaxAlign = EltAlign;
1623 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1624 for (auto *EltTy : STy->elements()) {
1625 Align EltAlign;
1626 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1627 if (EltAlign > MaxAlign)
1628 MaxAlign = EltAlign;
1629 if (MaxAlign == MaxMaxAlign)
1630 break;
1631 }
1632 }
1633}
1634
1635/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1636/// function arguments in the caller parameter area.
1638 const DataLayout &DL) const {
1639 // 16byte and wider vectors are passed on 16byte boundary.
1640 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1641 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1642 if (Subtarget.hasAltivec())
1643 getMaxByValAlign(Ty, Alignment, Align(16));
1644 return Alignment.value();
1645}
1646
1648 return Subtarget.useSoftFloat();
1649}
1650
1652 return Subtarget.hasSPE();
1653}
1654
1656 return VT.isScalarInteger();
1657}
1658
1660 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1661 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1662 return false;
1663
1664 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1665 if (VTy->getScalarType()->isIntegerTy()) {
1666 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1667 if (ElemSizeInBits == 32) {
1668 Index = Subtarget.isLittleEndian() ? 2 : 1;
1669 return true;
1670 }
1671 if (ElemSizeInBits == 64) {
1672 Index = Subtarget.isLittleEndian() ? 1 : 0;
1673 return true;
1674 }
1675 }
1676 }
1677 return false;
1678}
1679
1680const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1681 switch ((PPCISD::NodeType)Opcode) {
1682 case PPCISD::FIRST_NUMBER: break;
1683 case PPCISD::FSEL: return "PPCISD::FSEL";
1684 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1685 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1686 case PPCISD::FCFID: return "PPCISD::FCFID";
1687 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1688 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1689 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1690 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1691 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1692 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1693 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1694 case PPCISD::FRE: return "PPCISD::FRE";
1695 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1696 case PPCISD::FTSQRT:
1697 return "PPCISD::FTSQRT";
1698 case PPCISD::FSQRT:
1699 return "PPCISD::FSQRT";
1700 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1701 case PPCISD::VPERM: return "PPCISD::VPERM";
1702 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1704 return "PPCISD::XXSPLTI_SP_TO_DP";
1706 return "PPCISD::XXSPLTI32DX";
1707 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1708 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1709 case PPCISD::XXPERM:
1710 return "PPCISD::XXPERM";
1711 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1712 case PPCISD::CMPB: return "PPCISD::CMPB";
1713 case PPCISD::Hi: return "PPCISD::Hi";
1714 case PPCISD::Lo: return "PPCISD::Lo";
1715 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1716 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1717 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1718 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1719 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1720 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1721 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1722 case PPCISD::SRL: return "PPCISD::SRL";
1723 case PPCISD::SRA: return "PPCISD::SRA";
1724 case PPCISD::SHL: return "PPCISD::SHL";
1725 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1726 case PPCISD::CALL: return "PPCISD::CALL";
1727 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1728 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1729 case PPCISD::CALL_RM:
1730 return "PPCISD::CALL_RM";
1732 return "PPCISD::CALL_NOP_RM";
1734 return "PPCISD::CALL_NOTOC_RM";
1735 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1736 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1737 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1738 case PPCISD::BCTRL_RM:
1739 return "PPCISD::BCTRL_RM";
1741 return "PPCISD::BCTRL_LOAD_TOC_RM";
1742 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1743 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1744 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1745 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1746 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1747 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1748 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1749 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1750 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1751 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1753 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1755 return "PPCISD::ANDI_rec_1_EQ_BIT";
1757 return "PPCISD::ANDI_rec_1_GT_BIT";
1758 case PPCISD::VCMP: return "PPCISD::VCMP";
1759 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1760 case PPCISD::LBRX: return "PPCISD::LBRX";
1761 case PPCISD::STBRX: return "PPCISD::STBRX";
1762 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1763 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1764 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1765 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1766 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1767 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1768 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1769 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1770 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1772 return "PPCISD::ST_VSR_SCAL_INT";
1773 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1774 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1775 case PPCISD::BDZ: return "PPCISD::BDZ";
1776 case PPCISD::MFFS: return "PPCISD::MFFS";
1777 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1778 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1779 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1780 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1781 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1782 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1783 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1784 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1785 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1786 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1787 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1788 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1789 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1790 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1791 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1792 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1793 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1794 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1795 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1796 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1797 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1798 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1799 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1801 return "PPCISD::PADDI_DTPREL";
1802 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1803 case PPCISD::SC: return "PPCISD::SC";
1804 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1805 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1806 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1807 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1808 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1809 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1810 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1811 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1812 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1813 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1814 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1815 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1817 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1819 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1820 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1821 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1822 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1823 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1824 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1825 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1826 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1827 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1829 return "PPCISD::STRICT_FADDRTZ";
1831 return "PPCISD::STRICT_FCTIDZ";
1833 return "PPCISD::STRICT_FCTIWZ";
1835 return "PPCISD::STRICT_FCTIDUZ";
1837 return "PPCISD::STRICT_FCTIWUZ";
1839 return "PPCISD::STRICT_FCFID";
1841 return "PPCISD::STRICT_FCFIDU";
1843 return "PPCISD::STRICT_FCFIDS";
1845 return "PPCISD::STRICT_FCFIDUS";
1846 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1847 case PPCISD::STORE_COND:
1848 return "PPCISD::STORE_COND";
1849 }
1850 return nullptr;
1851}
1852
1854 EVT VT) const {
1855 if (!VT.isVector())
1856 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1857
1859}
1860
1862 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1863 return true;
1864}
1865
1866//===----------------------------------------------------------------------===//
1867// Node matching predicates, for use by the tblgen matching code.
1868//===----------------------------------------------------------------------===//
1869
1870/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1872 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1873 return CFP->getValueAPF().isZero();
1874 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1875 // Maybe this has already been legalized into the constant pool?
1876 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1877 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1878 return CFP->getValueAPF().isZero();
1879 }
1880 return false;
1881}
1882
1883/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1884/// true if Op is undef or if it matches the specified value.
1885static bool isConstantOrUndef(int Op, int Val) {
1886 return Op < 0 || Op == Val;
1887}
1888
1889/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1890/// VPKUHUM instruction.
1891/// The ShuffleKind distinguishes between big-endian operations with
1892/// two different inputs (0), either-endian operations with two identical
1893/// inputs (1), and little-endian operations with two different inputs (2).
1894/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1896 SelectionDAG &DAG) {
1897 bool IsLE = DAG.getDataLayout().isLittleEndian();
1898 if (ShuffleKind == 0) {
1899 if (IsLE)
1900 return false;
1901 for (unsigned i = 0; i != 16; ++i)
1902 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1903 return false;
1904 } else if (ShuffleKind == 2) {
1905 if (!IsLE)
1906 return false;
1907 for (unsigned i = 0; i != 16; ++i)
1908 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1909 return false;
1910 } else if (ShuffleKind == 1) {
1911 unsigned j = IsLE ? 0 : 1;
1912 for (unsigned i = 0; i != 8; ++i)
1913 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1914 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1915 return false;
1916 }
1917 return true;
1918}
1919
1920/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1921/// VPKUWUM instruction.
1922/// The ShuffleKind distinguishes between big-endian operations with
1923/// two different inputs (0), either-endian operations with two identical
1924/// inputs (1), and little-endian operations with two different inputs (2).
1925/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1927 SelectionDAG &DAG) {
1928 bool IsLE = DAG.getDataLayout().isLittleEndian();
1929 if (ShuffleKind == 0) {
1930 if (IsLE)
1931 return false;
1932 for (unsigned i = 0; i != 16; i += 2)
1933 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1934 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1935 return false;
1936 } else if (ShuffleKind == 2) {
1937 if (!IsLE)
1938 return false;
1939 for (unsigned i = 0; i != 16; i += 2)
1940 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1941 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1942 return false;
1943 } else if (ShuffleKind == 1) {
1944 unsigned j = IsLE ? 0 : 2;
1945 for (unsigned i = 0; i != 8; i += 2)
1946 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1947 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1948 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1949 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1950 return false;
1951 }
1952 return true;
1953}
1954
1955/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1956/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1957/// current subtarget.
1958///
1959/// The ShuffleKind distinguishes between big-endian operations with
1960/// two different inputs (0), either-endian operations with two identical
1961/// inputs (1), and little-endian operations with two different inputs (2).
1962/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1964 SelectionDAG &DAG) {
1965 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1966 if (!Subtarget.hasP8Vector())
1967 return false;
1968
1969 bool IsLE = DAG.getDataLayout().isLittleEndian();
1970 if (ShuffleKind == 0) {
1971 if (IsLE)
1972 return false;
1973 for (unsigned i = 0; i != 16; i += 4)
1974 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1975 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1976 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1977 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1978 return false;
1979 } else if (ShuffleKind == 2) {
1980 if (!IsLE)
1981 return false;
1982 for (unsigned i = 0; i != 16; i += 4)
1983 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1984 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1985 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1986 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1987 return false;
1988 } else if (ShuffleKind == 1) {
1989 unsigned j = IsLE ? 0 : 4;
1990 for (unsigned i = 0; i != 8; i += 4)
1991 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1992 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1993 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1994 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1995 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1996 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1997 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1998 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1999 return false;
2000 }
2001 return true;
2002}
2003
2004/// isVMerge - Common function, used to match vmrg* shuffles.
2005///
2006static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2007 unsigned LHSStart, unsigned RHSStart) {
2008 if (N->getValueType(0) != MVT::v16i8)
2009 return false;
2010 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2011 "Unsupported merge size!");
2012
2013 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2014 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2015 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2016 LHSStart+j+i*UnitSize) ||
2017 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2018 RHSStart+j+i*UnitSize))
2019 return false;
2020 }
2021 return true;
2022}
2023
2024/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2025/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2026/// The ShuffleKind distinguishes between big-endian merges with two
2027/// different inputs (0), either-endian merges with two identical inputs (1),
2028/// and little-endian merges with two different inputs (2). For the latter,
2029/// the input operands are swapped (see PPCInstrAltivec.td).
2031 unsigned ShuffleKind, SelectionDAG &DAG) {
2032 if (DAG.getDataLayout().isLittleEndian()) {
2033 if (ShuffleKind == 1) // unary
2034 return isVMerge(N, UnitSize, 0, 0);
2035 else if (ShuffleKind == 2) // swapped
2036 return isVMerge(N, UnitSize, 0, 16);
2037 else
2038 return false;
2039 } else {
2040 if (ShuffleKind == 1) // unary
2041 return isVMerge(N, UnitSize, 8, 8);
2042 else if (ShuffleKind == 0) // normal
2043 return isVMerge(N, UnitSize, 8, 24);
2044 else
2045 return false;
2046 }
2047}
2048
2049/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2050/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2051/// The ShuffleKind distinguishes between big-endian merges with two
2052/// different inputs (0), either-endian merges with two identical inputs (1),
2053/// and little-endian merges with two different inputs (2). For the latter,
2054/// the input operands are swapped (see PPCInstrAltivec.td).
2056 unsigned ShuffleKind, SelectionDAG &DAG) {
2057 if (DAG.getDataLayout().isLittleEndian()) {
2058 if (ShuffleKind == 1) // unary
2059 return isVMerge(N, UnitSize, 8, 8);
2060 else if (ShuffleKind == 2) // swapped
2061 return isVMerge(N, UnitSize, 8, 24);
2062 else
2063 return false;
2064 } else {
2065 if (ShuffleKind == 1) // unary
2066 return isVMerge(N, UnitSize, 0, 0);
2067 else if (ShuffleKind == 0) // normal
2068 return isVMerge(N, UnitSize, 0, 16);
2069 else
2070 return false;
2071 }
2072}
2073
2074/**
2075 * Common function used to match vmrgew and vmrgow shuffles
2076 *
2077 * The indexOffset determines whether to look for even or odd words in
2078 * the shuffle mask. This is based on the of the endianness of the target
2079 * machine.
2080 * - Little Endian:
2081 * - Use offset of 0 to check for odd elements
2082 * - Use offset of 4 to check for even elements
2083 * - Big Endian:
2084 * - Use offset of 0 to check for even elements
2085 * - Use offset of 4 to check for odd elements
2086 * A detailed description of the vector element ordering for little endian and
2087 * big endian can be found at
2088 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2089 * Targeting your applications - what little endian and big endian IBM XL C/C++
2090 * compiler differences mean to you
2091 *
2092 * The mask to the shuffle vector instruction specifies the indices of the
2093 * elements from the two input vectors to place in the result. The elements are
2094 * numbered in array-access order, starting with the first vector. These vectors
2095 * are always of type v16i8, thus each vector will contain 16 elements of size
2096 * 8. More info on the shuffle vector can be found in the
2097 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2098 * Language Reference.
2099 *
2100 * The RHSStartValue indicates whether the same input vectors are used (unary)
2101 * or two different input vectors are used, based on the following:
2102 * - If the instruction uses the same vector for both inputs, the range of the
2103 * indices will be 0 to 15. In this case, the RHSStart value passed should
2104 * be 0.
2105 * - If the instruction has two different vectors then the range of the
2106 * indices will be 0 to 31. In this case, the RHSStart value passed should
2107 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2108 * to 31 specify elements in the second vector).
2109 *
2110 * \param[in] N The shuffle vector SD Node to analyze
2111 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2112 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2113 * vector to the shuffle_vector instruction
2114 * \return true iff this shuffle vector represents an even or odd word merge
2115 */
2116static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2117 unsigned RHSStartValue) {
2118 if (N->getValueType(0) != MVT::v16i8)
2119 return false;
2120
2121 for (unsigned i = 0; i < 2; ++i)
2122 for (unsigned j = 0; j < 4; ++j)
2123 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2124 i*RHSStartValue+j+IndexOffset) ||
2125 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2126 i*RHSStartValue+j+IndexOffset+8))
2127 return false;
2128 return true;
2129}
2130
2131/**
2132 * Determine if the specified shuffle mask is suitable for the vmrgew or
2133 * vmrgow instructions.
2134 *
2135 * \param[in] N The shuffle vector SD Node to analyze
2136 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2137 * \param[in] ShuffleKind Identify the type of merge:
2138 * - 0 = big-endian merge with two different inputs;
2139 * - 1 = either-endian merge with two identical inputs;
2140 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2141 * little-endian merges).
2142 * \param[in] DAG The current SelectionDAG
2143 * \return true iff this shuffle mask
2144 */
2146 unsigned ShuffleKind, SelectionDAG &DAG) {
2147 if (DAG.getDataLayout().isLittleEndian()) {
2148 unsigned indexOffset = CheckEven ? 4 : 0;
2149 if (ShuffleKind == 1) // Unary
2150 return isVMerge(N, indexOffset, 0);
2151 else if (ShuffleKind == 2) // swapped
2152 return isVMerge(N, indexOffset, 16);
2153 else
2154 return false;
2155 }
2156 else {
2157 unsigned indexOffset = CheckEven ? 0 : 4;
2158 if (ShuffleKind == 1) // Unary
2159 return isVMerge(N, indexOffset, 0);
2160 else if (ShuffleKind == 0) // Normal
2161 return isVMerge(N, indexOffset, 16);
2162 else
2163 return false;
2164 }
2165 return false;
2166}
2167
2168/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2169/// amount, otherwise return -1.
2170/// The ShuffleKind distinguishes between big-endian operations with two
2171/// different inputs (0), either-endian operations with two identical inputs
2172/// (1), and little-endian operations with two different inputs (2). For the
2173/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2174int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2175 SelectionDAG &DAG) {
2176 if (N->getValueType(0) != MVT::v16i8)
2177 return -1;
2178
2179 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2180
2181 // Find the first non-undef value in the shuffle mask.
2182 unsigned i;
2183 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2184 /*search*/;
2185
2186 if (i == 16) return -1; // all undef.
2187
2188 // Otherwise, check to see if the rest of the elements are consecutively
2189 // numbered from this value.
2190 unsigned ShiftAmt = SVOp->getMaskElt(i);
2191 if (ShiftAmt < i) return -1;
2192
2193 ShiftAmt -= i;
2194 bool isLE = DAG.getDataLayout().isLittleEndian();
2195
2196 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2197 // Check the rest of the elements to see if they are consecutive.
2198 for (++i; i != 16; ++i)
2199 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2200 return -1;
2201 } else if (ShuffleKind == 1) {
2202 // Check the rest of the elements to see if they are consecutive.
2203 for (++i; i != 16; ++i)
2204 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2205 return -1;
2206 } else
2207 return -1;
2208
2209 if (isLE)
2210 ShiftAmt = 16 - ShiftAmt;
2211
2212 return ShiftAmt;
2213}
2214
2215/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2216/// specifies a splat of a single element that is suitable for input to
2217/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2219 EVT VT = N->getValueType(0);
2220 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2221 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2222
2223 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2224 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2225
2226 // The consecutive indices need to specify an element, not part of two
2227 // different elements. So abandon ship early if this isn't the case.
2228 if (N->getMaskElt(0) % EltSize != 0)
2229 return false;
2230
2231 // This is a splat operation if each element of the permute is the same, and
2232 // if the value doesn't reference the second vector.
2233 unsigned ElementBase = N->getMaskElt(0);
2234
2235 // FIXME: Handle UNDEF elements too!
2236 if (ElementBase >= 16)
2237 return false;
2238
2239 // Check that the indices are consecutive, in the case of a multi-byte element
2240 // splatted with a v16i8 mask.
2241 for (unsigned i = 1; i != EltSize; ++i)
2242 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2243 return false;
2244
2245 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2246 if (N->getMaskElt(i) < 0) continue;
2247 for (unsigned j = 0; j != EltSize; ++j)
2248 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2249 return false;
2250 }
2251 return true;
2252}
2253
2254/// Check that the mask is shuffling N byte elements. Within each N byte
2255/// element of the mask, the indices could be either in increasing or
2256/// decreasing order as long as they are consecutive.
2257/// \param[in] N the shuffle vector SD Node to analyze
2258/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2259/// Word/DoubleWord/QuadWord).
2260/// \param[in] StepLen the delta indices number among the N byte element, if
2261/// the mask is in increasing/decreasing order then it is 1/-1.
2262/// \return true iff the mask is shuffling N byte elements.
2263static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2264 int StepLen) {
2265 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2266 "Unexpected element width.");
2267 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2268
2269 unsigned NumOfElem = 16 / Width;
2270 unsigned MaskVal[16]; // Width is never greater than 16
2271 for (unsigned i = 0; i < NumOfElem; ++i) {
2272 MaskVal[0] = N->getMaskElt(i * Width);
2273 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2274 return false;
2275 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2276 return false;
2277 }
2278
2279 for (unsigned int j = 1; j < Width; ++j) {
2280 MaskVal[j] = N->getMaskElt(i * Width + j);
2281 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2282 return false;
2283 }
2284 }
2285 }
2286
2287 return true;
2288}
2289
2290bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2291 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2292 if (!isNByteElemShuffleMask(N, 4, 1))
2293 return false;
2294
2295 // Now we look at mask elements 0,4,8,12
2296 unsigned M0 = N->getMaskElt(0) / 4;
2297 unsigned M1 = N->getMaskElt(4) / 4;
2298 unsigned M2 = N->getMaskElt(8) / 4;
2299 unsigned M3 = N->getMaskElt(12) / 4;
2300 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2301 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2302
2303 // Below, let H and L be arbitrary elements of the shuffle mask
2304 // where H is in the range [4,7] and L is in the range [0,3].
2305 // H, 1, 2, 3 or L, 5, 6, 7
2306 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2307 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2308 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2309 InsertAtByte = IsLE ? 12 : 0;
2310 Swap = M0 < 4;
2311 return true;
2312 }
2313 // 0, H, 2, 3 or 4, L, 6, 7
2314 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2315 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2316 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2317 InsertAtByte = IsLE ? 8 : 4;
2318 Swap = M1 < 4;
2319 return true;
2320 }
2321 // 0, 1, H, 3 or 4, 5, L, 7
2322 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2323 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2324 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2325 InsertAtByte = IsLE ? 4 : 8;
2326 Swap = M2 < 4;
2327 return true;
2328 }
2329 // 0, 1, 2, H or 4, 5, 6, L
2330 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2331 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2332 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2333 InsertAtByte = IsLE ? 0 : 12;
2334 Swap = M3 < 4;
2335 return true;
2336 }
2337
2338 // If both vector operands for the shuffle are the same vector, the mask will
2339 // contain only elements from the first one and the second one will be undef.
2340 if (N->getOperand(1).isUndef()) {
2341 ShiftElts = 0;
2342 Swap = true;
2343 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2344 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2345 InsertAtByte = IsLE ? 12 : 0;
2346 return true;
2347 }
2348 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2349 InsertAtByte = IsLE ? 8 : 4;
2350 return true;
2351 }
2352 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2353 InsertAtByte = IsLE ? 4 : 8;
2354 return true;
2355 }
2356 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2357 InsertAtByte = IsLE ? 0 : 12;
2358 return true;
2359 }
2360 }
2361
2362 return false;
2363}
2364
2366 bool &Swap, bool IsLE) {
2367 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2368 // Ensure each byte index of the word is consecutive.
2369 if (!isNByteElemShuffleMask(N, 4, 1))
2370 return false;
2371
2372 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2373 unsigned M0 = N->getMaskElt(0) / 4;
2374 unsigned M1 = N->getMaskElt(4) / 4;
2375 unsigned M2 = N->getMaskElt(8) / 4;
2376 unsigned M3 = N->getMaskElt(12) / 4;
2377
2378 // If both vector operands for the shuffle are the same vector, the mask will
2379 // contain only elements from the first one and the second one will be undef.
2380 if (N->getOperand(1).isUndef()) {
2381 assert(M0 < 4 && "Indexing into an undef vector?");
2382 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2383 return false;
2384
2385 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2386 Swap = false;
2387 return true;
2388 }
2389
2390 // Ensure each word index of the ShuffleVector Mask is consecutive.
2391 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2392 return false;
2393
2394 if (IsLE) {
2395 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2396 // Input vectors don't need to be swapped if the leading element
2397 // of the result is one of the 3 left elements of the second vector
2398 // (or if there is no shift to be done at all).
2399 Swap = false;
2400 ShiftElts = (8 - M0) % 8;
2401 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2402 // Input vectors need to be swapped if the leading element
2403 // of the result is one of the 3 left elements of the first vector
2404 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2405 Swap = true;
2406 ShiftElts = (4 - M0) % 4;
2407 }
2408
2409 return true;
2410 } else { // BE
2411 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2412 // Input vectors don't need to be swapped if the leading element
2413 // of the result is one of the 4 elements of the first vector.
2414 Swap = false;
2415 ShiftElts = M0;
2416 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2417 // Input vectors need to be swapped if the leading element
2418 // of the result is one of the 4 elements of the right vector.
2419 Swap = true;
2420 ShiftElts = M0 - 4;
2421 }
2422
2423 return true;
2424 }
2425}
2426
2428 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2429
2430 if (!isNByteElemShuffleMask(N, Width, -1))
2431 return false;
2432
2433 for (int i = 0; i < 16; i += Width)
2434 if (N->getMaskElt(i) != i + Width - 1)
2435 return false;
2436
2437 return true;
2438}
2439
2441 return isXXBRShuffleMaskHelper(N, 2);
2442}
2443
2445 return isXXBRShuffleMaskHelper(N, 4);
2446}
2447
2449 return isXXBRShuffleMaskHelper(N, 8);
2450}
2451
2453 return isXXBRShuffleMaskHelper(N, 16);
2454}
2455
2456/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2457/// if the inputs to the instruction should be swapped and set \p DM to the
2458/// value for the immediate.
2459/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2460/// AND element 0 of the result comes from the first input (LE) or second input
2461/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2462/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2463/// mask.
2465 bool &Swap, bool IsLE) {
2466 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2467
2468 // Ensure each byte index of the double word is consecutive.
2469 if (!isNByteElemShuffleMask(N, 8, 1))
2470 return false;
2471
2472 unsigned M0 = N->getMaskElt(0) / 8;
2473 unsigned M1 = N->getMaskElt(8) / 8;
2474 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2475
2476 // If both vector operands for the shuffle are the same vector, the mask will
2477 // contain only elements from the first one and the second one will be undef.
2478 if (N->getOperand(1).isUndef()) {
2479 if ((M0 | M1) < 2) {
2480 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2481 Swap = false;
2482 return true;
2483 } else
2484 return false;
2485 }
2486
2487 if (IsLE) {
2488 if (M0 > 1 && M1 < 2) {
2489 Swap = false;
2490 } else if (M0 < 2 && M1 > 1) {
2491 M0 = (M0 + 2) % 4;
2492 M1 = (M1 + 2) % 4;
2493 Swap = true;
2494 } else
2495 return false;
2496
2497 // Note: if control flow comes here that means Swap is already set above
2498 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2499 return true;
2500 } else { // BE
2501 if (M0 < 2 && M1 > 1) {
2502 Swap = false;
2503 } else if (M0 > 1 && M1 < 2) {
2504 M0 = (M0 + 2) % 4;
2505 M1 = (M1 + 2) % 4;
2506 Swap = true;
2507 } else
2508 return false;
2509
2510 // Note: if control flow comes here that means Swap is already set above
2511 DM = (M0 << 1) + (M1 & 1);
2512 return true;
2513 }
2514}
2515
2516
2517/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2518/// appropriate for PPC mnemonics (which have a big endian bias - namely
2519/// elements are counted from the left of the vector register).
2520unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2521 SelectionDAG &DAG) {
2522 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2523 assert(isSplatShuffleMask(SVOp, EltSize));
2524 EVT VT = SVOp->getValueType(0);
2525
2526 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2527 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2528 : SVOp->getMaskElt(0);
2529
2530 if (DAG.getDataLayout().isLittleEndian())
2531 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2532 else
2533 return SVOp->getMaskElt(0) / EltSize;
2534}
2535
2536/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2537/// by using a vspltis[bhw] instruction of the specified element size, return
2538/// the constant being splatted. The ByteSize field indicates the number of
2539/// bytes of each element [124] -> [bhw].
2541 SDValue OpVal;
2542
2543 // If ByteSize of the splat is bigger than the element size of the
2544 // build_vector, then we have a case where we are checking for a splat where
2545 // multiple elements of the buildvector are folded together into a single
2546 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2547 unsigned EltSize = 16/N->getNumOperands();
2548 if (EltSize < ByteSize) {
2549 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2550 SDValue UniquedVals[4];
2551 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2552
2553 // See if all of the elements in the buildvector agree across.
2554 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2555 if (N->getOperand(i).isUndef()) continue;
2556 // If the element isn't a constant, bail fully out.
2557 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2558
2559 if (!UniquedVals[i&(Multiple-1)].getNode())
2560 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2561 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2562 return SDValue(); // no match.
2563 }
2564
2565 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2566 // either constant or undef values that are identical for each chunk. See
2567 // if these chunks can form into a larger vspltis*.
2568
2569 // Check to see if all of the leading entries are either 0 or -1. If
2570 // neither, then this won't fit into the immediate field.
2571 bool LeadingZero = true;
2572 bool LeadingOnes = true;
2573 for (unsigned i = 0; i != Multiple-1; ++i) {
2574 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2575
2576 LeadingZero &= isNullConstant(UniquedVals[i]);
2577 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2578 }
2579 // Finally, check the least significant entry.
2580 if (LeadingZero) {
2581 if (!UniquedVals[Multiple-1].getNode())
2582 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2583 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2584 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2585 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2586 }
2587 if (LeadingOnes) {
2588 if (!UniquedVals[Multiple-1].getNode())
2589 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2590 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2591 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2592 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2593 }
2594
2595 return SDValue();
2596 }
2597
2598 // Check to see if this buildvec has a single non-undef value in its elements.
2599 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2600 if (N->getOperand(i).isUndef()) continue;
2601 if (!OpVal.getNode())
2602 OpVal = N->getOperand(i);
2603 else if (OpVal != N->getOperand(i))
2604 return SDValue();
2605 }
2606
2607 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2608
2609 unsigned ValSizeInBytes = EltSize;
2610 uint64_t Value = 0;
2611 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2612 Value = CN->getZExtValue();
2613 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2614 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2615 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2616 }
2617
2618 // If the splat value is larger than the element value, then we can never do
2619 // this splat. The only case that we could fit the replicated bits into our
2620 // immediate field for would be zero, and we prefer to use vxor for it.
2621 if (ValSizeInBytes < ByteSize) return SDValue();
2622
2623 // If the element value is larger than the splat value, check if it consists
2624 // of a repeated bit pattern of size ByteSize.
2625 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2626 return SDValue();
2627
2628 // Properly sign extend the value.
2629 int MaskVal = SignExtend32(Value, ByteSize * 8);
2630
2631 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2632 if (MaskVal == 0) return SDValue();
2633
2634 // Finally, if this value fits in a 5 bit sext field, return it
2635 if (SignExtend32<5>(MaskVal) == MaskVal)
2636 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2637 return SDValue();
2638}
2639
2640//===----------------------------------------------------------------------===//
2641// Addressing Mode Selection
2642//===----------------------------------------------------------------------===//
2643
2644/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2645/// or 64-bit immediate, and if the value can be accurately represented as a
2646/// sign extension from a 16-bit value. If so, this returns true and the
2647/// immediate.
2648bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2649 if (!isa<ConstantSDNode>(N))
2650 return false;
2651
2652 Imm = (int16_t)N->getAsZExtVal();
2653 if (N->getValueType(0) == MVT::i32)
2654 return Imm == (int32_t)N->getAsZExtVal();
2655 else
2656 return Imm == (int64_t)N->getAsZExtVal();
2657}
2659 return isIntS16Immediate(Op.getNode(), Imm);
2660}
2661
2662/// Used when computing address flags for selecting loads and stores.
2663/// If we have an OR, check if the LHS and RHS are provably disjoint.
2664/// An OR of two provably disjoint values is equivalent to an ADD.
2665/// Most PPC load/store instructions compute the effective address as a sum,
2666/// so doing this conversion is useful.
2667static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2668 if (N.getOpcode() != ISD::OR)
2669 return false;
2670 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2671 if (!LHSKnown.Zero.getBoolValue())
2672 return false;
2673 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2674 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2675}
2676
2677/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2678/// be represented as an indexed [r+r] operation.
2680 SDValue &Index,
2681 SelectionDAG &DAG) const {
2682 for (SDNode *U : N->uses()) {
2683 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2684 if (Memop->getMemoryVT() == MVT::f64) {
2685 Base = N.getOperand(0);
2686 Index = N.getOperand(1);
2687 return true;
2688 }
2689 }
2690 }
2691 return false;
2692}
2693
2694/// isIntS34Immediate - This method tests if value of node given can be
2695/// accurately represented as a sign extension from a 34-bit value. If so,
2696/// this returns true and the immediate.
2697bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2698 if (!isa<ConstantSDNode>(N))
2699 return false;
2700
2701 Imm = (int64_t)N->getAsZExtVal();
2702 return isInt<34>(Imm);
2703}
2705 return isIntS34Immediate(Op.getNode(), Imm);
2706}
2707
2708/// SelectAddressRegReg - Given the specified addressed, check to see if it
2709/// can be represented as an indexed [r+r] operation. Returns false if it
2710/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2711/// non-zero and N can be represented by a base register plus a signed 16-bit
2712/// displacement, make a more precise judgement by checking (displacement % \p
2713/// EncodingAlignment).
2716 MaybeAlign EncodingAlignment) const {
2717 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2718 // a [pc+imm].
2720 return false;
2721
2722 int16_t Imm = 0;
2723 if (N.getOpcode() == ISD::ADD) {
2724 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2725 // SPE load/store can only handle 8-bit offsets.
2726 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2727 return true;
2728 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2729 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2730 return false; // r+i
2731 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2732 return false; // r+i
2733
2734 Base = N.getOperand(0);
2735 Index = N.getOperand(1);
2736 return true;
2737 } else if (N.getOpcode() == ISD::OR) {
2738 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2739 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2740 return false; // r+i can fold it if we can.
2741
2742 // If this is an or of disjoint bitfields, we can codegen this as an add
2743 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2744 // disjoint.
2745 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2746
2747 if (LHSKnown.Zero.getBoolValue()) {
2748 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2749 // If all of the bits are known zero on the LHS or RHS, the add won't
2750 // carry.
2751 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2752 Base = N.getOperand(0);
2753 Index = N.getOperand(1);
2754 return true;
2755 }
2756 }
2757 }
2758
2759 return false;
2760}
2761
2762// If we happen to be doing an i64 load or store into a stack slot that has
2763// less than a 4-byte alignment, then the frame-index elimination may need to
2764// use an indexed load or store instruction (because the offset may not be a
2765// multiple of 4). The extra register needed to hold the offset comes from the
2766// register scavenger, and it is possible that the scavenger will need to use
2767// an emergency spill slot. As a result, we need to make sure that a spill slot
2768// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2769// stack slot.
2770static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2771 // FIXME: This does not handle the LWA case.
2772 if (VT != MVT::i64)
2773 return;
2774
2775 // NOTE: We'll exclude negative FIs here, which come from argument
2776 // lowering, because there are no known test cases triggering this problem
2777 // using packed structures (or similar). We can remove this exclusion if
2778 // we find such a test case. The reason why this is so test-case driven is
2779 // because this entire 'fixup' is only to prevent crashes (from the
2780 // register scavenger) on not-really-valid inputs. For example, if we have:
2781 // %a = alloca i1
2782 // %b = bitcast i1* %a to i64*
2783 // store i64* a, i64 b
2784 // then the store should really be marked as 'align 1', but is not. If it
2785 // were marked as 'align 1' then the indexed form would have been
2786 // instruction-selected initially, and the problem this 'fixup' is preventing
2787 // won't happen regardless.
2788 if (FrameIdx < 0)
2789 return;
2790
2792 MachineFrameInfo &MFI = MF.getFrameInfo();
2793
2794 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2795 return;
2796
2797 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2798 FuncInfo->setHasNonRISpills();
2799}
2800
2801/// Returns true if the address N can be represented by a base register plus
2802/// a signed 16-bit displacement [r+imm], and if it is not better
2803/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2804/// displacements that are multiples of that value.
2806 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2807 MaybeAlign EncodingAlignment) const {
2808 // FIXME dl should come from parent load or store, not from address
2809 SDLoc dl(N);
2810
2811 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2812 // a [pc+imm].
2814 return false;
2815
2816 // If this can be more profitably realized as r+r, fail.
2817 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2818 return false;
2819
2820 if (N.getOpcode() == ISD::ADD) {
2821 int16_t imm = 0;
2822 if (isIntS16Immediate(N.getOperand(1), imm) &&
2823 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2824 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2825 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2826 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2827 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2828 } else {
2829 Base = N.getOperand(0);
2830 }
2831 return true; // [r+i]
2832 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2833 // Match LOAD (ADD (X, Lo(G))).
2834 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2835 "Cannot handle constant offsets yet!");
2836 Disp = N.getOperand(1).getOperand(0); // The global address.
2841 Base = N.getOperand(0);
2842 return true; // [&g+r]
2843 }
2844 } else if (N.getOpcode() == ISD::OR) {
2845 int16_t imm = 0;
2846 if (isIntS16Immediate(N.getOperand(1), imm) &&
2847 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2848 // If this is an or of disjoint bitfields, we can codegen this as an add
2849 // (for better address arithmetic) if the LHS and RHS of the OR are
2850 // provably disjoint.
2851 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2852
2853 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2854 // If all of the bits are known zero on the LHS or RHS, the add won't
2855 // carry.
2856 if (FrameIndexSDNode *FI =
2857 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2858 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2859 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2860 } else {
2861 Base = N.getOperand(0);
2862 }
2863 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2864 return true;
2865 }
2866 }
2867 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2868 // Loading from a constant address.
2869
2870 // If this address fits entirely in a 16-bit sext immediate field, codegen
2871 // this as "d, 0"
2872 int16_t Imm;
2873 if (isIntS16Immediate(CN, Imm) &&
2874 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2875 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2876 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2877 CN->getValueType(0));
2878 return true;
2879 }
2880
2881 // Handle 32-bit sext immediates with LIS + addr mode.
2882 if ((CN->getValueType(0) == MVT::i32 ||
2883 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2884 (!EncodingAlignment ||
2885 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2886 int Addr = (int)CN->getZExtValue();
2887
2888 // Otherwise, break this down into an LIS + disp.
2889 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2890
2891 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2892 MVT::i32);
2893 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2894 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2895 return true;
2896 }
2897 }
2898
2899 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2900 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2901 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2902 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2903 } else
2904 Base = N;
2905 return true; // [r+0]
2906}
2907
2908/// Similar to the 16-bit case but for instructions that take a 34-bit
2909/// displacement field (prefixed loads/stores).
2911 SDValue &Base,
2912 SelectionDAG &DAG) const {
2913 // Only on 64-bit targets.
2914 if (N.getValueType() != MVT::i64)
2915 return false;
2916
2917 SDLoc dl(N);
2918 int64_t Imm = 0;
2919
2920 if (N.getOpcode() == ISD::ADD) {
2921 if (!isIntS34Immediate(N.getOperand(1), Imm))
2922 return false;
2923 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2924 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2925 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2926 else
2927 Base = N.getOperand(0);
2928 return true;
2929 }
2930
2931 if (N.getOpcode() == ISD::OR) {
2932 if (!isIntS34Immediate(N.getOperand(1), Imm))
2933 return false;
2934 // If this is an or of disjoint bitfields, we can codegen this as an add
2935 // (for better address arithmetic) if the LHS and RHS of the OR are
2936 // provably disjoint.
2937 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2938 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2939 return false;
2940 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2941 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2942 else
2943 Base = N.getOperand(0);
2944 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2945 return true;
2946 }
2947
2948 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2949 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2950 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2951 return true;
2952 }
2953
2954 return false;
2955}
2956
2957/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2958/// represented as an indexed [r+r] operation.
2960 SDValue &Index,
2961 SelectionDAG &DAG) const {
2962 // Check to see if we can easily represent this as an [r+r] address. This
2963 // will fail if it thinks that the address is more profitably represented as
2964 // reg+imm, e.g. where imm = 0.
2965 if (SelectAddressRegReg(N, Base, Index, DAG))
2966 return true;
2967
2968 // If the address is the result of an add, we will utilize the fact that the
2969 // address calculation includes an implicit add. However, we can reduce
2970 // register pressure if we do not materialize a constant just for use as the
2971 // index register. We only get rid of the add if it is not an add of a
2972 // value and a 16-bit signed constant and both have a single use.
2973 int16_t imm = 0;
2974 if (N.getOpcode() == ISD::ADD &&
2975 (!isIntS16Immediate(N.getOperand(1), imm) ||
2976 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2977 Base = N.getOperand(0);
2978 Index = N.getOperand(1);
2979 return true;
2980 }
2981
2982 // Otherwise, do it the hard way, using R0 as the base register.
2983 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2984 N.getValueType());
2985 Index = N;
2986 return true;
2987}
2988
2989template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2990 Ty *PCRelCand = dyn_cast<Ty>(N);
2991 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2992}
2993
2994/// Returns true if this address is a PC Relative address.
2995/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2996/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2998 // This is a materialize PC Relative node. Always select this as PC Relative.
2999 Base = N;
3000 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3001 return true;
3002 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
3003 isValidPCRelNode<GlobalAddressSDNode>(N) ||
3004 isValidPCRelNode<JumpTableSDNode>(N) ||
3005 isValidPCRelNode<BlockAddressSDNode>(N))
3006 return true;
3007 return false;
3008}
3009
3010/// Returns true if we should use a direct load into vector instruction
3011/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3012static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3013
3014 // If there are any other uses other than scalar to vector, then we should
3015 // keep it as a scalar load -> direct move pattern to prevent multiple
3016 // loads.
3017 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3018 if (!LD)
3019 return false;
3020
3021 EVT MemVT = LD->getMemoryVT();
3022 if (!MemVT.isSimple())
3023 return false;
3024 switch(MemVT.getSimpleVT().SimpleTy) {
3025 case MVT::i64:
3026 break;
3027 case MVT::i32:
3028 if (!ST.hasP8Vector())
3029 return false;
3030 break;
3031 case MVT::i16:
3032 case MVT::i8:
3033 if (!ST.hasP9Vector())
3034 return false;
3035 break;
3036 default:
3037 return false;
3038 }
3039
3040 SDValue LoadedVal(N, 0);
3041 if (!LoadedVal.hasOneUse())
3042 return false;
3043
3044 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3045 UI != UE; ++UI)
3046 if (UI.getUse().get().getResNo() == 0 &&
3047 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3048 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3049 return false;
3050
3051 return true;
3052}
3053
3054/// getPreIndexedAddressParts - returns true by value, base pointer and
3055/// offset pointer and addressing mode by reference if the node's address
3056/// can be legally represented as pre-indexed load / store address.
3058 SDValue &Offset,
3060 SelectionDAG &DAG) const {
3061 if (DisablePPCPreinc) return false;
3062
3063 bool isLoad = true;
3064 SDValue Ptr;
3065 EVT VT;
3066 Align Alignment;
3067 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3068 Ptr = LD->getBasePtr();
3069 VT = LD->getMemoryVT();
3070 Alignment = LD->getAlign();
3071 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3072 Ptr = ST->getBasePtr();
3073 VT = ST->getMemoryVT();
3074 Alignment = ST->getAlign();
3075 isLoad = false;
3076 } else
3077 return false;
3078
3079 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3080 // instructions because we can fold these into a more efficient instruction
3081 // instead, (such as LXSD).
3082 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3083 return false;
3084 }
3085
3086 // PowerPC doesn't have preinc load/store instructions for vectors
3087 if (VT.isVector())
3088 return false;
3089
3090 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3091 // Common code will reject creating a pre-inc form if the base pointer
3092 // is a frame index, or if N is a store and the base pointer is either
3093 // the same as or a predecessor of the value being stored. Check for
3094 // those situations here, and try with swapped Base/Offset instead.
3095 bool Swap = false;
3096
3097 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3098 Swap = true;
3099 else if (!isLoad) {
3100 SDValue Val = cast<StoreSDNode>(N)->getValue();
3101 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3102 Swap = true;
3103 }
3104
3105 if (Swap)
3107
3108 AM = ISD::PRE_INC;
3109 return true;
3110 }
3111
3112 // LDU/STU can only handle immediates that are a multiple of 4.
3113 if (VT != MVT::i64) {
3114 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3115 return false;
3116 } else {
3117 // LDU/STU need an address with at least 4-byte alignment.
3118 if (Alignment < Align(4))
3119 return false;
3120
3121 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3122 return false;
3123 }
3124
3125 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3126 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3127 // sext i32 to i64 when addr mode is r+i.
3128 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3129 LD->getExtensionType() == ISD::SEXTLOAD &&
3130 isa<ConstantSDNode>(Offset))
3131 return false;
3132 }
3133
3134 AM = ISD::PRE_INC;
3135 return true;
3136}
3137
3138//===----------------------------------------------------------------------===//
3139// LowerOperation implementation
3140//===----------------------------------------------------------------------===//
3141
3142/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3143/// and LoOpFlags to the target MO flags.
3144static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3145 unsigned &HiOpFlags, unsigned &LoOpFlags,
3146 const GlobalValue *GV = nullptr) {
3147 HiOpFlags = PPCII::MO_HA;
3148 LoOpFlags = PPCII::MO_LO;
3149
3150 // Don't use the pic base if not in PIC relocation model.
3151 if (IsPIC) {
3152 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3153 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3154 }
3155}
3156
3157static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3158 SelectionDAG &DAG) {
3159 SDLoc DL(HiPart);
3160 EVT PtrVT = HiPart.getValueType();
3161 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3162
3163 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3164 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3165
3166 // With PIC, the first instruction is actually "GR+hi(&G)".
3167 if (isPIC)
3168 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3169 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3170
3171 // Generate non-pic code that has direct accesses to the constant pool.
3172 // The address of the global is just (hi(&g)+lo(&g)).
3173 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3174}
3175
3177 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3178 FuncInfo->setUsesTOCBasePtr();
3179}
3180
3183}
3184
3185SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3186 SDValue GA) const {
3187 const bool Is64Bit = Subtarget.isPPC64();
3188 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3189 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3190 : Subtarget.isAIXABI()
3191 ? DAG.getRegister(PPC::R2, VT)
3192 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3193 SDValue Ops[] = { GA, Reg };
3194 return DAG.getMemIntrinsicNode(
3195 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3198}
3199
3200SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3201 SelectionDAG &DAG) const {
3202 EVT PtrVT = Op.getValueType();
3203 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3204 const Constant *C = CP->getConstVal();
3205
3206 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3207 // The actual address of the GlobalValue is stored in the TOC.
3208 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3209 if (Subtarget.isUsingPCRelativeCalls()) {
3210 SDLoc DL(CP);
3211 EVT Ty = getPointerTy(DAG.getDataLayout());
3212 SDValue ConstPool = DAG.getTargetConstantPool(
3213 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3214 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3215 }
3216 setUsesTOCBasePtr(DAG);
3217 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3218 return getTOCEntry(DAG, SDLoc(CP), GA);
3219 }
3220
3221 unsigned MOHiFlag, MOLoFlag;
3222 bool IsPIC = isPositionIndependent();
3223 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3224
3225 if (IsPIC && Subtarget.isSVR4ABI()) {
3226 SDValue GA =
3227 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3228 return getTOCEntry(DAG, SDLoc(CP), GA);
3229 }
3230
3231 SDValue CPIHi =
3232 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3233 SDValue CPILo =
3234 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3235 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3236}
3237
3238// For 64-bit PowerPC, prefer the more compact relative encodings.
3239// This trades 32 bits per jump table entry for one or two instructions
3240// on the jump site.
3242 if (isJumpTableRelative())
3244
3246}
3247
3250 return false;
3251 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3252 return true;
3254}
3255
3257 SelectionDAG &DAG) const {
3258 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3260
3261 switch (getTargetMachine().getCodeModel()) {
3262 case CodeModel::Small:
3263 case CodeModel::Medium:
3265 default:
3266 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3268 }
3269}
3270
3271const MCExpr *
3273 unsigned JTI,
3274 MCContext &Ctx) const {
3275 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3277
3278 switch (getTargetMachine().getCodeModel()) {
3279 case CodeModel::Small:
3280 case CodeModel::Medium:
3282 default:
3283 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3284 }
3285}
3286
3287SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3288 EVT PtrVT = Op.getValueType();
3289 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3290
3291 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3292 if (Subtarget.isUsingPCRelativeCalls()) {
3293 SDLoc DL(JT);
3294 EVT Ty = getPointerTy(DAG.getDataLayout());
3295 SDValue GA =
3296 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3297 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3298 return MatAddr;
3299 }
3300
3301 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3302 // The actual address of the GlobalValue is stored in the TOC.
3303 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3304 setUsesTOCBasePtr(DAG);
3305 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3306 return getTOCEntry(DAG, SDLoc(JT), GA);
3307 }
3308
3309 unsigned MOHiFlag, MOLoFlag;
3310 bool IsPIC = isPositionIndependent();
3311 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3312
3313 if (IsPIC && Subtarget.isSVR4ABI()) {
3314 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3316 return getTOCEntry(DAG, SDLoc(GA), GA);
3317 }
3318
3319 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3320 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3321 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3322}
3323
3324SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3325 SelectionDAG &DAG) const {
3326 EVT PtrVT = Op.getValueType();
3327 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3328 const BlockAddress *BA = BASDN->getBlockAddress();
3329
3330 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3331 if (Subtarget.isUsingPCRelativeCalls()) {
3332 SDLoc DL(BASDN);
3333 EVT Ty = getPointerTy(DAG.getDataLayout());
3334 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3336 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3337 return MatAddr;
3338 }
3339
3340 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3341 // The actual BlockAddress is stored in the TOC.
3342 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3343 setUsesTOCBasePtr(DAG);
3344 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3345 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3346 }
3347
3348 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3349 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3350 return getTOCEntry(
3351 DAG, SDLoc(BASDN),
3352 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3353
3354 unsigned MOHiFlag, MOLoFlag;
3355 bool IsPIC = isPositionIndependent();
3356 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3357 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3358 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3359 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3360}
3361
3362SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3363 SelectionDAG &DAG) const {
3364 if (Subtarget.isAIXABI())
3365 return LowerGlobalTLSAddressAIX(Op, DAG);
3366
3367 return LowerGlobalTLSAddressLinux(Op, DAG);
3368}
3369
3370/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3371/// and then apply the update.
3373 SelectionDAG &DAG,
3374 const TargetMachine &TM) {
3375 // Initialize TLS model opt setting lazily:
3376 // (1) Use initial-exec for single TLS var references within current function.
3377 // (2) Use local-dynamic for multiple TLS var references within current
3378 // function.
3379 PPCFunctionInfo *FuncInfo =
3381 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3383 // Iterate over all instructions within current function, collect all TLS
3384 // global variables (global variables taken as the first parameter to
3385 // Intrinsic::threadlocal_address).
3386 const Function &Func = DAG.getMachineFunction().getFunction();
3387 for (Function::const_iterator BI = Func.begin(), BE = Func.end(); BI != BE;
3388 ++BI)
3389 for (BasicBlock::const_iterator II = BI->begin(), IE = BI->end();
3390 II != IE; ++II)
3391 if (II->getOpcode() == Instruction::Call)
3392 if (const CallInst *CI = dyn_cast<const CallInst>(&*II))
3393 if (Function *CF = CI->getCalledFunction())
3394 if (CF->isDeclaration() &&
3395 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3396 if (const GlobalValue *GV =
3397 dyn_cast<GlobalValue>(II->getOperand(0))) {
3398 TLSModel::Model GVModel = TM.getTLSModel(GV);
3399 if (GVModel == TLSModel::LocalDynamic)
3400 TLSGV.insert(GV);
3401 }
3402
3403 unsigned TLSGVCnt = TLSGV.size();
3404 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3405 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3406 FuncInfo->setAIXFuncUseTLSIEForLD();
3408 }
3409
3410 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3411 LLVM_DEBUG(
3412 dbgs() << DAG.getMachineFunction().getName()
3413 << " function is using the TLS-IE model for TLS-LD access.\n");
3414 Model = TLSModel::InitialExec;
3415 }
3416}
3417
3418SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3419 SelectionDAG &DAG) const {
3420 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3421
3422 if (DAG.getTarget().useEmulatedTLS())
3423 report_fatal_error("Emulated TLS is not yet supported on AIX");
3424
3425 SDLoc dl(GA);
3426 const GlobalValue *GV = GA->getGlobal();
3427 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3428 bool Is64Bit = Subtarget.isPPC64();
3430
3431 // Apply update to the TLS model.
3432 if (Subtarget.hasAIXShLibTLSModelOpt())
3434
3435 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3436
3437 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3438 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3439 bool HasAIXSmallTLSGlobalAttr = false;
3440 SDValue VariableOffsetTGA =
3441 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3442 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3443 SDValue TLSReg;
3444
3445 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3446 if (GVar->hasAttribute("aix-small-tls"))
3447 HasAIXSmallTLSGlobalAttr = true;
3448
3449 if (Is64Bit) {
3450 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3451 // involves a load of the variable offset (from the TOC), followed by an
3452 // add of the loaded variable offset to R13 (the thread pointer).
3453 // This code sequence looks like:
3454 // ld reg1,var[TC](2)
3455 // add reg2, reg1, r13 // r13 contains the thread pointer
3456 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3457
3458 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3459 // global variable attribute, produce a faster access sequence for
3460 // local-exec TLS variables where the offset from the TLS base is encoded
3461 // as an immediate operand.
3462 //
3463 // We only utilize the faster local-exec access sequence when the TLS
3464 // variable has a size within the policy limit. We treat types that are
3465 // not sized or are empty as being over the policy size limit.
3466 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3467 IsTLSLocalExecModel) {
3468 Type *GVType = GV->getValueType();
3469 if (GVType->isSized() && !GVType->isEmptyTy() &&
3470 GV->getDataLayout().getTypeAllocSize(GVType) <=
3472 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3473 }
3474 } else {
3475 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3476 // involves loading the variable offset from the TOC, generating a call to
3477 // .__get_tpointer to get the thread pointer (which will be in R3), and
3478 // adding the two together:
3479 // lwz reg1,var[TC](2)
3480 // bla .__get_tpointer
3481 // add reg2, reg1, r3
3482 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3483
3484 // We do not implement the 32-bit version of the faster access sequence
3485 // for local-exec that is controlled by the -maix-small-local-exec-tls
3486 // option, or the "aix-small-tls" global variable attribute.
3487 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3488 report_fatal_error("The small-local-exec TLS access sequence is "
3489 "currently only supported on AIX (64-bit mode).");
3490 }
3491 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3492 }
3493
3494 if (Model == TLSModel::LocalDynamic) {
3495 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3496
3497 // We do not implement the 32-bit version of the faster access sequence
3498 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3499 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3500 report_fatal_error("The small-local-dynamic TLS access sequence is "
3501 "currently only supported on AIX (64-bit mode).");
3502
3503 // For local-dynamic on AIX, we need to generate one TOC entry for each
3504 // variable offset, and a single module-handle TOC entry for the entire
3505 // file.
3506
3507 SDValue VariableOffsetTGA =
3508 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3509 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3510
3512 GlobalVariable *TLSGV =
3513 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3514 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3516 assert(TLSGV && "Not able to create GV for _$TLSML.");
3517 SDValue ModuleHandleTGA =
3518 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3519 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3520 SDValue ModuleHandle =
3521 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3522
3523 // With the -maix-small-local-dynamic-tls option, produce a faster access
3524 // sequence for local-dynamic TLS variables where the offset from the
3525 // module-handle is encoded as an immediate operand.
3526 //
3527 // We only utilize the faster local-dynamic access sequence when the TLS
3528 // variable has a size within the policy limit. We treat types that are
3529 // not sized or are empty as being over the policy size limit.
3530 if (HasAIXSmallLocalDynamicTLS) {
3531 Type *GVType = GV->getValueType();
3532 if (GVType->isSized() && !GVType->isEmptyTy() &&
3533 GV->getDataLayout().getTypeAllocSize(GVType) <=
3535 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3536 ModuleHandle);
3537 }
3538
3539 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3540 }
3541
3542 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3543 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3544 // need to generate two TOC entries, one for the variable offset, one for the
3545 // region handle. The global address for the TOC entry of the region handle is
3546 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3547 // entry of the variable offset is created with MO_TLSGD_FLAG.
3548 SDValue VariableOffsetTGA =
3549 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3550 SDValue RegionHandleTGA =
3551 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3552 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3553 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3554 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3555 RegionHandle);
3556}
3557
3558SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3559 SelectionDAG &DAG) const {
3560 // FIXME: TLS addresses currently use medium model code sequences,
3561 // which is the most useful form. Eventually support for small and
3562 // large models could be added if users need it, at the cost of
3563 // additional complexity.
3564 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3565 if (DAG.getTarget().useEmulatedTLS())
3566 return LowerToTLSEmulatedModel(GA, DAG);
3567
3568 SDLoc dl(GA);
3569 const GlobalValue *GV = GA->getGlobal();
3570 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3571 bool is64bit = Subtarget.isPPC64();
3572 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3573 PICLevel::Level picLevel = M->getPICLevel();
3574
3576 TLSModel::Model Model = TM.getTLSModel(GV);
3577
3578 if (Model == TLSModel::LocalExec) {
3579 if (Subtarget.isUsingPCRelativeCalls()) {
3580 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3581 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3583 SDValue MatAddr =
3584 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3585 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3586 }
3587
3588 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3590 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3592 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3593 : DAG.getRegister(PPC::R2, MVT::i32);
3594
3595 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3596 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3597 }
3598
3599 if (Model == TLSModel::InitialExec) {
3600 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3602 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3603 SDValue TGATLS = DAG.getTargetGlobalAddress(
3604 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3605 SDValue TPOffset;
3606 if (IsPCRel) {
3607 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3608 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3610 } else {
3611 SDValue GOTPtr;
3612 if (is64bit) {
3613 setUsesTOCBasePtr(DAG);
3614 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3615 GOTPtr =
3616 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3617 } else {
3618 if (!TM.isPositionIndependent())
3619 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3620 else if (picLevel == PICLevel::SmallPIC)
3621 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3622 else
3623 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3624 }
3625 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3626 }
3627 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3628 }
3629
3630 if (Model == TLSModel::GeneralDynamic) {
3631 if (Subtarget.isUsingPCRelativeCalls()) {
3632 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3634 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3635 }
3636
3637 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3638 SDValue GOTPtr;
3639 if (is64bit) {
3640 setUsesTOCBasePtr(DAG);
3641 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3642 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3643 GOTReg, TGA);
3644 } else {
3645 if (picLevel == PICLevel::SmallPIC)
3646 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3647 else
3648 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3649 }
3650 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3651 GOTPtr, TGA, TGA);
3652 }
3653
3654 if (Model == TLSModel::LocalDynamic) {
3655 if (Subtarget.isUsingPCRelativeCalls()) {
3656 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3658 SDValue MatPCRel =
3659 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3660 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3661 }
3662
3663 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3664 SDValue GOTPtr;
3665 if (is64bit) {
3666 setUsesTOCBasePtr(DAG);
3667 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3668 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3669 GOTReg, TGA);
3670 } else {
3671 if (picLevel == PICLevel::SmallPIC)
3672 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3673 else
3674 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3675 }
3676 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3677 PtrVT, GOTPtr, TGA, TGA);
3678 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3679 PtrVT, TLSAddr, TGA);
3680 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3681 }
3682
3683 llvm_unreachable("Unknown TLS model!");
3684}
3685
3686SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3687 SelectionDAG &DAG) const {
3688 EVT PtrVT = Op.getValueType();
3689 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3690 SDLoc DL(GSDN);
3691 const GlobalValue *GV = GSDN->getGlobal();
3692
3693 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3694 // The actual address of the GlobalValue is stored in the TOC.
3695 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3696 if (Subtarget.isUsingPCRelativeCalls()) {
3697 EVT Ty = getPointerTy(DAG.getDataLayout());
3699 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3701 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3702 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3704 return Load;
3705 } else {
3706 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3708 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3709 }
3710 }
3711 setUsesTOCBasePtr(DAG);
3712 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3713 return getTOCEntry(DAG, DL, GA);
3714 }
3715
3716 unsigned MOHiFlag, MOLoFlag;
3717 bool IsPIC = isPositionIndependent();
3718 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3719
3720 if (IsPIC && Subtarget.isSVR4ABI()) {
3721 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3722 GSDN->getOffset(),
3724 return getTOCEntry(DAG, DL, GA);
3725 }
3726
3727 SDValue GAHi =
3728 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3729 SDValue GALo =
3730 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3731
3732 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3733}
3734
3735SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3736 bool IsStrict = Op->isStrictFPOpcode();
3738 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3739 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3740 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3741 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3742 EVT LHSVT = LHS.getValueType();
3743 SDLoc dl(Op);
3744
3745 // Soften the setcc with libcall if it is fp128.
3746 if (LHSVT == MVT::f128) {
3747 assert(!Subtarget.hasP9Vector() &&
3748 "SETCC for f128 is already legal under Power9!");
3749 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3750 Op->getOpcode() == ISD::STRICT_FSETCCS);
3751 if (RHS.getNode())
3752 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3753 DAG.getCondCode(CC));
3754 if (IsStrict)
3755 return DAG.getMergeValues({LHS, Chain}, dl);
3756 return LHS;
3757 }
3758
3759 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3760
3761 if (Op.getValueType() == MVT::v2i64) {
3762 // When the operands themselves are v2i64 values, we need to do something
3763 // special because VSX has no underlying comparison operations for these.
3764 if (LHS.getValueType() == MVT::v2i64) {
3765 // Equality can be handled by casting to the legal type for Altivec
3766 // comparisons, everything else needs to be expanded.
3767 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3768 return SDValue();
3769 SDValue SetCC32 = DAG.getSetCC(
3770 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3771 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3772 int ShuffV[] = {1, 0, 3, 2};
3773 SDValue Shuff =
3774 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3775 return DAG.getBitcast(MVT::v2i64,
3777 dl, MVT::v4i32, Shuff, SetCC32));
3778 }
3779
3780 // We handle most of these in the usual way.
3781 return Op;
3782 }
3783
3784 // If we're comparing for equality to zero, expose the fact that this is
3785 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3786 // fold the new nodes.
3787 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3788 return V;
3789
3790 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3791 // Leave comparisons against 0 and -1 alone for now, since they're usually
3792 // optimized. FIXME: revisit this when we can custom lower all setcc
3793 // optimizations.
3794 if (C->isAllOnes() || C->isZero())
3795 return SDValue();
3796 }
3797
3798 // If we have an integer seteq/setne, turn it into a compare against zero
3799 // by xor'ing the rhs with the lhs, which is faster than setting a
3800 // condition register, reading it back out, and masking the correct bit. The
3801 // normal approach here uses sub to do this instead of xor. Using xor exposes
3802 // the result to other bit-twiddling opportunities.
3803 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3804 EVT VT = Op.getValueType();
3805 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3806 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3807 }
3808 return SDValue();
3809}
3810
3811SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3812 SDNode *Node = Op.getNode();
3813 EVT VT = Node->getValueType(0);
3814 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3815 SDValue InChain = Node->getOperand(0);
3816 SDValue VAListPtr = Node->getOperand(1);
3817 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3818 SDLoc dl(Node);
3819
3820 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3821
3822 // gpr_index
3823 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3824 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3825 InChain = GprIndex.getValue(1);
3826
3827 if (VT == MVT::i64) {
3828 // Check if GprIndex is even
3829 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3830 DAG.getConstant(1, dl, MVT::i32));
3831 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3832 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3833 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3834 DAG.getConstant(1, dl, MVT::i32));
3835 // Align GprIndex to be even if it isn't
3836 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3837 GprIndex);
3838 }
3839
3840 // fpr index is 1 byte after gpr
3841 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3842 DAG.getConstant(1, dl, MVT::i32));
3843
3844 // fpr
3845 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3846 FprPtr, MachinePointerInfo(SV), MVT::i8);
3847 InChain = FprIndex.getValue(1);
3848
3849 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3850 DAG.getConstant(8, dl, MVT::i32));
3851
3852 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3853 DAG.getConstant(4, dl, MVT::i32));
3854
3855 // areas
3856 SDValue OverflowArea =
3857 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3858 InChain = OverflowArea.getValue(1);
3859
3860 SDValue RegSaveArea =
3861 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3862 InChain = RegSaveArea.getValue(1);
3863
3864 // select overflow_area if index > 8
3865 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3866 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3867
3868 // adjustment constant gpr_index * 4/8
3869 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3870 VT.isInteger() ? GprIndex : FprIndex,
3871 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3872 MVT::i32));
3873
3874 // OurReg = RegSaveArea + RegConstant
3875 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3876 RegConstant);
3877
3878 // Floating types are 32 bytes into RegSaveArea
3879 if (VT.isFloatingPoint())
3880 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3881 DAG.getConstant(32, dl, MVT::i32));
3882
3883 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3884 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3885 VT.isInteger() ? GprIndex : FprIndex,
3886 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3887 MVT::i32));
3888
3889 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3890 VT.isInteger() ? VAListPtr : FprPtr,
3891 MachinePointerInfo(SV), MVT::i8);
3892
3893 // determine if we should load from reg_save_area or overflow_area
3894 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3895
3896 // increase overflow_area by 4/8 if gpr/fpr > 8
3897 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3898 DAG.getConstant(VT.isInteger() ? 4 : 8,
3899 dl, MVT::i32));
3900
3901 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3902 OverflowAreaPlusN);
3903
3904 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3905 MachinePointerInfo(), MVT::i32);
3906
3907 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3908}
3909
3910SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3911 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3912
3913 // We have to copy the entire va_list struct:
3914 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3915 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3916 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3917 false, true, false, MachinePointerInfo(),
3919}
3920
3921SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3922 SelectionDAG &DAG) const {
3923 if (Subtarget.isAIXABI())
3924 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3925
3926 return Op.getOperand(0);
3927}
3928
3929SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3932
3933 assert((Op.getOpcode() == ISD::INLINEASM ||
3934 Op.getOpcode() == ISD::INLINEASM_BR) &&
3935 "Expecting Inline ASM node.");
3936
3937 // If an LR store is already known to be required then there is not point in
3938 // checking this ASM as well.
3939 if (MFI.isLRStoreRequired())
3940 return Op;
3941
3942 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3943 // type MVT::Glue. We want to ignore this last operand if that is the case.
3944 unsigned NumOps = Op.getNumOperands();
3945 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3946 --NumOps;
3947
3948 // Check all operands that may contain the LR.
3949 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3950 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3951 unsigned NumVals = Flags.getNumOperandRegisters();
3952 ++i; // Skip the ID value.
3953
3954 switch (Flags.getKind()) {
3955 default:
3956 llvm_unreachable("Bad flags!");
3960 i += NumVals;
3961 break;
3965 for (; NumVals; --NumVals, ++i) {
3966 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3967 if (Reg != PPC::LR && Reg != PPC::LR8)
3968 continue;
3969 MFI.setLRStoreRequired();
3970 return Op;
3971 }
3972 break;
3973 }
3974 }
3975 }
3976
3977 return Op;
3978}
3979
3980SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3981 SelectionDAG &DAG) const {
3982 if (Subtarget.isAIXABI())
3983 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3984
3985 SDValue Chain = Op.getOperand(0);
3986 SDValue Trmp = Op.getOperand(1); // trampoline
3987 SDValue FPtr = Op.getOperand(2); // nested function
3988 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3989 SDLoc dl(Op);
3990
3991 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3992 bool isPPC64 = (PtrVT == MVT::i64);
3993 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3994
3997
3998 Entry.Ty = IntPtrTy;
3999 Entry.Node = Trmp; Args.push_back(Entry);
4000
4001 // TrampSize == (isPPC64 ? 48 : 40);
4002 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
4003 isPPC64 ? MVT::i64 : MVT::i32);
4004 Args.push_back(Entry);
4005
4006 Entry.Node = FPtr; Args.push_back(Entry);
4007 Entry.Node = Nest; Args.push_back(Entry);
4008
4009 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4011 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4013 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4014
4015 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4016 return CallResult.second;
4017}
4018
4019SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4021 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4022 EVT PtrVT = getPointerTy(MF.getDataLayout());
4023
4024 SDLoc dl(Op);
4025
4026 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4027 // vastart just stores the address of the VarArgsFrameIndex slot into the
4028 // memory location argument.
4029 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4030 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4031 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4032 MachinePointerInfo(SV));
4033 }
4034
4035 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4036 // We suppose the given va_list is already allocated.
4037 //
4038 // typedef struct {
4039 // char gpr; /* index into the array of 8 GPRs
4040 // * stored in the register save area
4041 // * gpr=0 corresponds to r3,
4042 // * gpr=1 to r4, etc.
4043 // */
4044 // char fpr; /* index into the array of 8 FPRs
4045 // * stored in the register save area
4046 // * fpr=0 corresponds to f1,
4047 // * fpr=1 to f2, etc.
4048 // */
4049 // char *overflow_arg_area;
4050 // /* location on stack that holds
4051 // * the next overflow argument
4052 // */
4053 // char *reg_save_area;
4054 // /* where r3:r10 and f1:f8 (if saved)
4055 // * are stored
4056 // */
4057 // } va_list[1];
4058
4059 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4060 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4061 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4062 PtrVT);
4063 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4064 PtrVT);
4065
4066 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4067 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4068
4069 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4070 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4071
4072 uint64_t FPROffset = 1;
4073 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4074
4075 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4076
4077 // Store first byte : number of int regs
4078 SDValue firstStore =
4079 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4080 MachinePointerInfo(SV), MVT::i8);
4081 uint64_t nextOffset = FPROffset;
4082 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4083 ConstFPROffset);
4084
4085 // Store second byte : number of float regs
4086 SDValue secondStore =
4087 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4088 MachinePointerInfo(SV, nextOffset), MVT::i8);
4089 nextOffset += StackOffset;
4090 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4091
4092 // Store second word : arguments given on stack
4093 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4094 MachinePointerInfo(SV, nextOffset));
4095 nextOffset += FrameOffset;
4096 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4097
4098 // Store third word : arguments given in registers
4099 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4100 MachinePointerInfo(SV, nextOffset));
4101}
4102
4103/// FPR - The set of FP registers that should be allocated for arguments
4104/// on Darwin and AIX.
4105static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4106 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4107 PPC::F11, PPC::F12, PPC::F13};
4108
4109/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4110/// the stack.
4111static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4112 unsigned PtrByteSize) {
4113 unsigned ArgSize = ArgVT.getStoreSize();
4114 if (Flags.isByVal())
4115 ArgSize = Flags.getByValSize();
4116
4117 // Round up to multiples of the pointer size, except for array members,
4118 // which are always packed.
4119 if (!Flags.isInConsecutiveRegs())
4120 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4121
4122 return ArgSize;
4123}
4124
4125/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4126/// on the stack.
4128 ISD::ArgFlagsTy Flags,
4129 unsigned PtrByteSize) {
4130 Align Alignment(PtrByteSize);
4131
4132 // Altivec parameters are padded to a 16 byte boundary.
4133 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4134 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4135 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4136 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4137 Alignment = Align(16);
4138
4139 // ByVal parameters are aligned as requested.
4140 if (Flags.isByVal()) {
4141 auto BVAlign = Flags.getNonZeroByValAlign();
4142 if (BVAlign > PtrByteSize) {
4143 if (BVAlign.value() % PtrByteSize != 0)
4145 "ByVal alignment is not a multiple of the pointer size");
4146
4147 Alignment = BVAlign;
4148 }
4149 }
4150
4151 // Array members are always packed to their original alignment.
4152 if (Flags.isInConsecutiveRegs()) {
4153 // If the array member was split into multiple registers, the first
4154 // needs to be aligned to the size of the full type. (Except for
4155 // ppcf128, which is only aligned as its f64 components.)
4156 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4157 Alignment = Align(OrigVT.getStoreSize());
4158 else
4159 Alignment = Align(ArgVT.getStoreSize());
4160 }
4161
4162 return Alignment;
4163}
4164
4165/// CalculateStackSlotUsed - Return whether this argument will use its
4166/// stack slot (instead of being passed in registers). ArgOffset,
4167/// AvailableFPRs, and AvailableVRs must hold the current argument
4168/// position, and will be updated to account for this argument.
4169static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4170 unsigned PtrByteSize, unsigned LinkageSize,
4171 unsigned ParamAreaSize, unsigned &ArgOffset,
4172 unsigned &AvailableFPRs,
4173 unsigned &AvailableVRs) {
4174 bool UseMemory = false;
4175
4176 // Respect alignment of argument on the stack.
4177 Align Alignment =
4178 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4179 ArgOffset = alignTo(ArgOffset, Alignment);
4180 // If there's no space left in the argument save area, we must
4181 // use memory (this check also catches zero-sized arguments).
4182 if (ArgOffset >= LinkageSize + ParamAreaSize)
4183 UseMemory = true;
4184
4185 // Allocate argument on the stack.
4186 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4187 if (Flags.isInConsecutiveRegsLast())
4188 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4189 // If we overran the argument save area, we must use memory
4190 // (this check catches arguments passed partially in memory)
4191 if (ArgOffset > LinkageSize + ParamAreaSize)
4192 UseMemory = true;
4193
4194 // However, if the argument is actually passed in an FPR or a VR,
4195 // we don't use memory after all.
4196 if (!Flags.isByVal()) {
4197 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4198 if (AvailableFPRs > 0) {
4199 --AvailableFPRs;
4200 return false;
4201 }
4202 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4203 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4204 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4205 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4206 if (AvailableVRs > 0) {
4207 --AvailableVRs;
4208 return false;
4209 }
4210 }
4211
4212 return UseMemory;
4213}
4214
4215/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4216/// ensure minimum alignment required for target.
4218 unsigned NumBytes) {
4219 return alignTo(NumBytes, Lowering->getStackAlign());
4220}
4221
4222SDValue PPCTargetLowering::LowerFormalArguments(
4223 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4224 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4225 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4226 if (Subtarget.isAIXABI())
4227 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4228 InVals);
4229 if (Subtarget.is64BitELFABI())
4230 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4231 InVals);
4232 assert(Subtarget.is32BitELFABI());
4233 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4234 InVals);
4235}
4236
4237SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4238 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4239 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4240 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4241
4242 // 32-bit SVR4 ABI Stack Frame Layout:
4243 // +-----------------------------------+
4244 // +--> | Back chain |
4245 // | +-----------------------------------+
4246 // | | Floating-point register save area |
4247 // | +-----------------------------------+
4248 // | | General register save area |
4249 // | +-----------------------------------+
4250 // | | CR save word |
4251 // | +-----------------------------------+
4252 // | | VRSAVE save word |
4253 // | +-----------------------------------+
4254 // | | Alignment padding |
4255 // | +-----------------------------------+
4256 // | | Vector register save area |
4257 // | +-----------------------------------+
4258 // | | Local variable space |
4259 // | +-----------------------------------+
4260 // | | Parameter list area |
4261 // | +-----------------------------------+
4262 // | | LR save word |
4263 // | +-----------------------------------+
4264 // SP--> +--- | Back chain |
4265 // +-----------------------------------+
4266 //
4267 // Specifications:
4268 // System V Application Binary Interface PowerPC Processor Supplement
4269 // AltiVec Technology Programming Interface Manual
4270
4272 MachineFrameInfo &MFI = MF.getFrameInfo();
4273 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4274
4275 EVT PtrVT = getPointerTy(MF.getDataLayout());
4276 // Potential tail calls could cause overwriting of argument stack slots.
4277 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4278 (CallConv == CallingConv::Fast));
4279 const Align PtrAlign(4);
4280
4281 // Assign locations to all of the incoming arguments.
4283 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4284 *DAG.getContext());
4285
4286 // Reserve space for the linkage area on the stack.
4287 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4288 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4289 if (useSoftFloat())
4290 CCInfo.PreAnalyzeFormalArguments(Ins);
4291
4292 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4293 CCInfo.clearWasPPCF128();
4294
4295 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4296 CCValAssign &VA = ArgLocs[i];
4297
4298 // Arguments stored in registers.
4299 if (VA.isRegLoc()) {
4300 const TargetRegisterClass *RC;
4301 EVT ValVT = VA.getValVT();
4302
4303 switch (ValVT.getSimpleVT().SimpleTy) {
4304 default:
4305 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4306 case MVT::i1:
4307 case MVT::i32:
4308 RC = &PPC::GPRCRegClass;
4309 break;
4310 case MVT::f32:
4311 if (Subtarget.hasP8Vector())
4312 RC = &PPC::VSSRCRegClass;
4313 else if (Subtarget.hasSPE())
4314 RC = &PPC::GPRCRegClass;
4315 else
4316 RC = &PPC::F4RCRegClass;
4317 break;
4318 case MVT::f64:
4319 if (Subtarget.hasVSX())
4320 RC = &PPC::VSFRCRegClass;
4321 else if (Subtarget.hasSPE())
4322 // SPE passes doubles in GPR pairs.
4323 RC = &PPC::GPRCRegClass;
4324 else
4325 RC = &PPC::F8RCRegClass;
4326 break;
4327 case MVT::v16i8:
4328 case MVT::v8i16:
4329 case MVT::v4i32:
4330 RC = &PPC::VRRCRegClass;
4331 break;
4332 case MVT::v4f32:
4333 RC = &PPC::VRRCRegClass;
4334 break;
4335 case MVT::v2f64:
4336 case MVT::v2i64:
4337 RC = &PPC::VRRCRegClass;
4338 break;
4339 }
4340
4341 SDValue ArgValue;
4342 // Transform the arguments stored in physical registers into
4343 // virtual ones.
4344 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4345 assert(i + 1 < e && "No second half of double precision argument");
4346 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4347 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4348 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4349 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4350 if (!Subtarget.isLittleEndian())
4351 std::swap (ArgValueLo, ArgValueHi);
4352 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4353 ArgValueHi);
4354 } else {
4355 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4356 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4357 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4358 if (ValVT == MVT::i1)
4359 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4360 }
4361
4362 InVals.push_back(ArgValue);
4363 } else {
4364 // Argument stored in memory.
4365 assert(VA.isMemLoc());
4366
4367 // Get the extended size of the argument type in stack
4368 unsigned ArgSize = VA.getLocVT().getStoreSize();
4369 // Get the actual size of the argument type
4370 unsigned ObjSize = VA.getValVT().getStoreSize();
4371 unsigned ArgOffset = VA.getLocMemOffset();
4372 // Stack objects in PPC32 are right justified.
4373 ArgOffset += ArgSize - ObjSize;
4374 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4375
4376 // Create load nodes to retrieve arguments from the stack.
4377 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4378 InVals.push_back(
4379 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4380 }
4381 }
4382
4383 // Assign locations to all of the incoming aggregate by value arguments.
4384 // Aggregates passed by value are stored in the local variable space of the
4385 // caller's stack frame, right above the parameter list area.
4386 SmallVector<CCValAssign, 16> ByValArgLocs;
4387 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4388 ByValArgLocs, *DAG.getContext());
4389
4390 // Reserve stack space for the allocations in CCInfo.
4391 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4392
4393 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4394
4395 // Area that is at least reserved in the caller of this function.
4396 unsigned MinReservedArea = CCByValInfo.getStackSize();
4397 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4398
4399 // Set the size that is at least reserved in caller of this function. Tail
4400 // call optimized function's reserved stack space needs to be aligned so that
4401 // taking the difference between two stack areas will result in an aligned
4402 // stack.
4403 MinReservedArea =
4404 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4405 FuncInfo->setMinReservedArea(MinReservedArea);
4406
4408
4409 // If the function takes variable number of arguments, make a frame index for
4410 // the start of the first vararg value... for expansion of llvm.va_start.
4411 if (isVarArg) {
4412 static const MCPhysReg GPArgRegs[] = {
4413 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4414 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4415 };
4416 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4417
4418 static const MCPhysReg FPArgRegs[] = {
4419 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4420 PPC::F8
4421 };
4422 unsigned NumFPArgRegs = std::size(FPArgRegs);
4423
4424 if (useSoftFloat() || hasSPE())
4425 NumFPArgRegs = 0;
4426
4427 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4428 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4429
4430 // Make room for NumGPArgRegs and NumFPArgRegs.
4431 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4432 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4433
4435 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4436
4437 FuncInfo->setVarArgsFrameIndex(
4438 MFI.CreateStackObject(Depth, Align(8), false));
4439 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4440
4441 // The fixed integer arguments of a variadic function are stored to the
4442 // VarArgsFrameIndex on the stack so that they may be loaded by
4443 // dereferencing the result of va_next.
4444 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4445 // Get an existing live-in vreg, or add a new one.
4446 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4447 if (!VReg)
4448 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4449
4450 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4451 SDValue Store =
4452 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4453 MemOps.push_back(Store);
4454 // Increment the address by four for the next argument to store
4455 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4456 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4457 }
4458
4459 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4460 // is set.
4461 // The double arguments are stored to the VarArgsFrameIndex
4462 // on the stack.
4463 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4464 // Get an existing live-in vreg, or add a new one.
4465 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4466 if (!VReg)
4467 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4468
4469 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4470 SDValue Store =
4471 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4472 MemOps.push_back(Store);
4473 // Increment the address by eight for the next argument to store
4474 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4475 PtrVT);
4476 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4477 }
4478 }
4479
4480 if (!MemOps.empty())
4481 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4482
4483 return Chain;
4484}
4485
4486// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4487// value to MVT::i64 and then truncate to the correct register size.
4488SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4489 EVT ObjectVT, SelectionDAG &DAG,
4490 SDValue ArgVal,
4491 const SDLoc &dl) const {
4492 if (Flags.isSExt())
4493 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4494 DAG.getValueType(ObjectVT));
4495 else if (Flags.isZExt())
4496 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4497 DAG.getValueType(ObjectVT));
4498
4499 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4500}
4501
4502SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4503 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4504 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4505 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4506 // TODO: add description of PPC stack frame format, or at least some docs.
4507 //
4508 bool isELFv2ABI = Subtarget.isELFv2ABI();
4509 bool isLittleEndian = Subtarget.isLittleEndian();
4511 MachineFrameInfo &MFI = MF.getFrameInfo();
4512 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4513
4514 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4515 "fastcc not supported on varargs functions");
4516
4517 EVT PtrVT = getPointerTy(MF.getDataLayout());
4518 // Potential tail calls could cause overwriting of argument stack slots.
4519 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4520 (CallConv == CallingConv::Fast));
4521 unsigned PtrByteSize = 8;
4522 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4523
4524 static const MCPhysReg GPR[] = {
4525 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4526 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4527 };
4528 static const MCPhysReg VR[] = {
4529 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4530 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4531 };
4532
4533 const unsigned Num_GPR_Regs = std::size(GPR);
4534 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4535 const unsigned Num_VR_Regs = std::size(VR);
4536
4537 // Do a first pass over the arguments to determine whether the ABI
4538 // guarantees that our caller has allocated the parameter save area
4539 // on its stack frame. In the ELFv1 ABI, this is always the case;
4540 // in the ELFv2 ABI, it is true if this is a vararg function or if
4541 // any parameter is located in a stack slot.
4542
4543 bool HasParameterArea = !isELFv2ABI || isVarArg;
4544 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4545 unsigned NumBytes = LinkageSize;
4546 unsigned AvailableFPRs = Num_FPR_Regs;
4547 unsigned AvailableVRs = Num_VR_Regs;
4548 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4549 if (Ins[i].Flags.isNest())
4550 continue;
4551
4552 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4553 PtrByteSize, LinkageSize, ParamAreaSize,
4554 NumBytes, AvailableFPRs, AvailableVRs))
4555 HasParameterArea = true;
4556 }
4557
4558 // Add DAG nodes to load the arguments or copy them out of registers. On
4559 // entry to a function on PPC, the arguments start after the linkage area,
4560 // although the first ones are often in registers.
4561
4562 unsigned ArgOffset = LinkageSize;
4563 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4566 unsigned CurArgIdx = 0;
4567 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4568 SDValue ArgVal;
4569 bool needsLoad = false;
4570 EVT ObjectVT = Ins[ArgNo].VT;
4571 EVT OrigVT = Ins[ArgNo].ArgVT;
4572 unsigned ObjSize = ObjectVT.getStoreSize();
4573 unsigned ArgSize = ObjSize;
4574 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4575 if (Ins[ArgNo].isOrigArg()) {
4576 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4577 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4578 }
4579 // We re-align the argument offset for each argument, except when using the
4580 // fast calling convention, when we need to make sure we do that only when
4581 // we'll actually use a stack slot.
4582 unsigned CurArgOffset;
4583 Align Alignment;
4584 auto ComputeArgOffset = [&]() {
4585 /* Respect alignment of argument on the stack. */
4586 Alignment =
4587 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4588 ArgOffset = alignTo(ArgOffset, Alignment);
4589 CurArgOffset = ArgOffset;
4590 };
4591
4592 if (CallConv != CallingConv::Fast) {
4593 ComputeArgOffset();
4594
4595 /* Compute GPR index associated with argument offset. */
4596 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4597 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4598 }
4599
4600 // FIXME the codegen can be much improved in some cases.
4601 // We do not have to keep everything in memory.
4602 if (Flags.isByVal()) {
4603 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4604
4605 if (CallConv == CallingConv::Fast)
4606 ComputeArgOffset();
4607
4608 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4609 ObjSize = Flags.getByValSize();
4610 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4611 // Empty aggregate parameters do not take up registers. Examples:
4612 // struct { } a;
4613 // union { } b;
4614 // int c[0];
4615 // etc. However, we have to provide a place-holder in InVals, so
4616 // pretend we have an 8-byte item at the current address for that
4617 // purpose.
4618 if (!ObjSize) {
4619 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4620 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4621 InVals.push_back(FIN);
4622 continue;
4623 }
4624
4625 // Create a stack object covering all stack doublewords occupied
4626 // by the argument. If the argument is (fully or partially) on
4627 // the stack, or if the argument is fully in registers but the
4628 // caller has allocated the parameter save anyway, we can refer
4629 // directly to the caller's stack frame. Otherwise, create a
4630 // local copy in our own frame.
4631 int FI;
4632 if (HasParameterArea ||
4633 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4634 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4635 else
4636 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4637 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4638
4639 // Handle aggregates smaller than 8 bytes.
4640 if (ObjSize < PtrByteSize) {
4641 // The value of the object is its address, which differs from the
4642 // address of the enclosing doubleword on big-endian systems.
4643 SDValue Arg = FIN;
4644 if (!isLittleEndian) {
4645 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4646 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4647 }
4648 InVals.push_back(Arg);
4649
4650 if (GPR_idx != Num_GPR_Regs) {
4651 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4652 FuncInfo->addLiveInAttr(VReg, Flags);
4653 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4654 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4655 SDValue Store =
4656 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4657 MachinePointerInfo(&*FuncArg), ObjType);
4658 MemOps.push_back(Store);
4659 }
4660 // Whether we copied from a register or not, advance the offset
4661 // into the parameter save area by a full doubleword.
4662 ArgOffset += PtrByteSize;
4663 continue;
4664 }
4665
4666 // The value of the object is its address, which is the address of
4667 // its first stack doubleword.
4668 InVals.push_back(FIN);
4669
4670 // Store whatever pieces of the object are in registers to memory.
4671 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4672 if (GPR_idx == Num_GPR_Regs)
4673 break;
4674
4675 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4676 FuncInfo->addLiveInAttr(VReg, Flags);
4677 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4678 SDValue Addr = FIN;
4679 if (j) {
4680 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4681 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4682 }
4683 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4684 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4685 SDValue Store =
4686 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4687 MachinePointerInfo(&*FuncArg, j), ObjType);
4688 MemOps.push_back(Store);
4689 ++GPR_idx;
4690 }
4691 ArgOffset += ArgSize;
4692 continue;
4693 }
4694
4695 switch (ObjectVT.getSimpleVT().SimpleTy) {
4696 default: llvm_unreachable("Unhandled argument type!");
4697 case MVT::i1:
4698 case MVT::i32:
4699 case MVT::i64:
4700 if (Flags.isNest()) {
4701 // The 'nest' parameter, if any, is passed in R11.
4702 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4703 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4704
4705 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4706 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4707
4708 break;
4709 }
4710
4711 // These can be scalar arguments or elements of an integer array type
4712 // passed directly. Clang may use those instead of "byval" aggregate
4713 // types to avoid forcing arguments to memory unnecessarily.
4714 if (GPR_idx != Num_GPR_Regs) {
4715 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4716 FuncInfo->addLiveInAttr(VReg, Flags);
4717 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4718
4719 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4720 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4721 // value to MVT::i64 and then truncate to the correct register size.
4722 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4723 } else {
4724 if (CallConv == CallingConv::Fast)
4725 ComputeArgOffset();
4726
4727 needsLoad = true;
4728 ArgSize = PtrByteSize;
4729 }
4730 if (CallConv != CallingConv::Fast || needsLoad)
4731 ArgOffset += 8;
4732 break;
4733
4734 case MVT::f32:
4735 case MVT::f64:
4736 // These can be scalar arguments or elements of a float array type
4737 // passed directly. The latter are used to implement ELFv2 homogenous
4738 // float aggregates.
4739 if (FPR_idx != Num_FPR_Regs) {
4740 unsigned VReg;
4741
4742 if (ObjectVT == MVT::f32)
4743 VReg = MF.addLiveIn(FPR[FPR_idx],
4744 Subtarget.hasP8Vector()
4745 ? &PPC::VSSRCRegClass
4746 : &PPC::F4RCRegClass);
4747 else
4748 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4749 ? &PPC::VSFRCRegClass
4750 : &PPC::F8RCRegClass);
4751
4752 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4753 ++FPR_idx;
4754 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4755 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4756 // once we support fp <-> gpr moves.
4757
4758 // This can only ever happen in the presence of f32 array types,
4759 // since otherwise we never run out of FPRs before running out
4760 // of GPRs.
4761 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4762 FuncInfo->addLiveInAttr(VReg, Flags);
4763 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4764
4765 if (ObjectVT == MVT::f32) {
4766 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4767 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4768 DAG.getConstant(32, dl, MVT::i32));
4769 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4770 }
4771
4772 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4773 } else {
4774 if (CallConv == CallingConv::Fast)
4775 ComputeArgOffset();
4776
4777 needsLoad = true;
4778 }
4779
4780 // When passing an array of floats, the array occupies consecutive
4781 // space in the argument area; only round up to the next doubleword
4782 // at the end of the array. Otherwise, each float takes 8 bytes.
4783 if (CallConv != CallingConv::Fast || needsLoad) {
4784 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4785 ArgOffset += ArgSize;
4786 if (Flags.isInConsecutiveRegsLast())
4787 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4788 }
4789 break;
4790 case MVT::v4f32:
4791 case MVT::v4i32:
4792 case MVT::v8i16:
4793 case MVT::v16i8:
4794 case MVT::v2f64:
4795 case MVT::v2i64:
4796 case MVT::v1i128:
4797 case MVT::f128:
4798 // These can be scalar arguments or elements of a vector array type
4799 // passed directly. The latter are used to implement ELFv2 homogenous
4800 // vector aggregates.
4801 if (VR_idx != Num_VR_Regs) {
4802 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4803 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4804 ++VR_idx;
4805 } else {
4806 if (CallConv == CallingConv::Fast)
4807 ComputeArgOffset();
4808 needsLoad = true;
4809 }
4810 if (CallConv != CallingConv::Fast || needsLoad)
4811 ArgOffset += 16;
4812 break;
4813 }
4814
4815 // We need to load the argument to a virtual register if we determined
4816 // above that we ran out of physical registers of the appropriate type.
4817 if (needsLoad) {
4818 if (ObjSize < ArgSize && !isLittleEndian)
4819 CurArgOffset += ArgSize - ObjSize;
4820 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4821 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4822 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4823 }
4824
4825 InVals.push_back(ArgVal);
4826 }
4827
4828 // Area that is at least reserved in the caller of this function.
4829 unsigned MinReservedArea;
4830 if (HasParameterArea)
4831 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4832 else
4833 MinReservedArea = LinkageSize;
4834
4835 // Set the size that is at least reserved in caller of this function. Tail
4836 // call optimized functions' reserved stack space needs to be aligned so that
4837 // taking the difference between two stack areas will result in an aligned
4838 // stack.
4839 MinReservedArea =
4840 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4841 FuncInfo->setMinReservedArea(MinReservedArea);
4842
4843 // If the function takes variable number of arguments, make a frame index for
4844 // the start of the first vararg value... for expansion of llvm.va_start.
4845 // On ELFv2ABI spec, it writes:
4846 // C programs that are intended to be *portable* across different compilers
4847 // and architectures must use the header file <stdarg.h> to deal with variable
4848 // argument lists.
4849 if (isVarArg && MFI.hasVAStart()) {
4850 int Depth = ArgOffset;
4851
4852 FuncInfo->setVarArgsFrameIndex(
4853 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4854 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4855
4856 // If this function is vararg, store any remaining integer argument regs
4857 // to their spots on the stack so that they may be loaded by dereferencing
4858 // the result of va_next.
4859 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4860 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4861 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4862 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4863 SDValue Store =
4864 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4865 MemOps.push_back(Store);
4866 // Increment the address by four for the next argument to store
4867 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4868 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4869 }
4870 }
4871
4872 if (!MemOps.empty())
4873 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4874
4875 return Chain;
4876}
4877
4878/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4879/// adjusted to accommodate the arguments for the tailcall.
4880static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4881 unsigned ParamSize) {
4882
4883 if (!isTailCall) return 0;
4884
4886 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4887 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4888 // Remember only if the new adjustment is bigger.
4889 if (SPDiff < FI->getTailCallSPDelta())
4890 FI->setTailCallSPDelta(SPDiff);
4891
4892 return SPDiff;
4893}
4894
4895static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4896
4897static bool callsShareTOCBase(const Function *Caller,
4898 const GlobalValue *CalleeGV,
4899 const TargetMachine &TM) {
4900 // It does not make sense to call callsShareTOCBase() with a caller that
4901 // is PC Relative since PC Relative callers do not have a TOC.
4902#ifndef NDEBUG
4903 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4904 assert(!STICaller->isUsingPCRelativeCalls() &&
4905 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4906#endif
4907
4908 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4909 // don't have enough information to determine if the caller and callee share
4910 // the same TOC base, so we have to pessimistically assume they don't for
4911 // correctness.
4912 if (!CalleeGV)
4913 return false;
4914
4915 // If the callee is preemptable, then the static linker will use a plt-stub
4916 // which saves the toc to the stack, and needs a nop after the call
4917 // instruction to convert to a toc-restore.
4918 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4919 return false;
4920
4921 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4922 // We may need a TOC restore in the situation where the caller requires a
4923 // valid TOC but the callee is PC Relative and does not.
4924 const Function *F = dyn_cast<Function>(CalleeGV);
4925 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4926
4927 // If we have an Alias we can try to get the function from there.
4928 if (Alias) {
4929 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4930 F = dyn_cast<Function>(GlobalObj);
4931 }
4932
4933 // If we still have no valid function pointer we do not have enough
4934 // information to determine if the callee uses PC Relative calls so we must
4935 // assume that it does.
4936 if (!F)
4937 return false;
4938
4939 // If the callee uses PC Relative we cannot guarantee that the callee won't
4940 // clobber the TOC of the caller and so we must assume that the two
4941 // functions do not share a TOC base.
4942 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4943 if (STICallee->isUsingPCRelativeCalls())
4944 return false;
4945
4946 // If the GV is not a strong definition then we need to assume it can be
4947 // replaced by another function at link time. The function that replaces
4948 // it may not share the same TOC as the caller since the callee may be
4949 // replaced by a PC Relative version of the same function.
4950 if (!CalleeGV->isStrongDefinitionForLinker())
4951 return false;
4952
4953 // The medium and large code models are expected to provide a sufficiently
4954 // large TOC to provide all data addressing needs of a module with a
4955 // single TOC.
4956 if (CodeModel::Medium == TM.getCodeModel() ||
4957 CodeModel::Large == TM.getCodeModel())
4958 return true;
4959
4960 // Any explicitly-specified sections and section prefixes must also match.
4961 // Also, if we're using -ffunction-sections, then each function is always in
4962 // a different section (the same is true for COMDAT functions).
4963 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4964 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4965 return false;
4966 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4967 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4968 return false;
4969 }
4970
4971 return true;
4972}
4973
4974static bool
4976 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4977 assert(Subtarget.is64BitELFABI());
4978
4979 const unsigned PtrByteSize = 8;
4980 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4981
4982 static const MCPhysReg GPR[] = {
4983 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4984 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4985 };
4986 static const MCPhysReg VR[] = {
4987 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4988 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4989 };
4990
4991 const unsigned NumGPRs = std::size(GPR);
4992 const unsigned NumFPRs = 13;
4993 const unsigned NumVRs = std::size(VR);
4994 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4995
4996 unsigned NumBytes = LinkageSize;
4997 unsigned AvailableFPRs = NumFPRs;
4998 unsigned AvailableVRs = NumVRs;
4999
5000 for (const ISD::OutputArg& Param : Outs) {
5001 if (Param.Flags.isNest()) continue;
5002
5003 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5004 LinkageSize, ParamAreaSize, NumBytes,
5005 AvailableFPRs, AvailableVRs))
5006 return true;
5007 }
5008 return false;
5009}
5010
5011static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5012 if (CB.arg_size() != CallerFn->arg_size())
5013 return false;
5014
5015 auto CalleeArgIter = CB.arg_begin();
5016 auto CalleeArgEnd = CB.arg_end();
5017 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5018
5019 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5020 const Value* CalleeArg = *CalleeArgIter;
5021 const Value* CallerArg = &(*CallerArgIter);
5022 if (CalleeArg == CallerArg)
5023 continue;
5024
5025 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5026 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5027 // }
5028 // 1st argument of callee is undef and has the same type as caller.
5029 if (CalleeArg->getType() == CallerArg->getType() &&
5030 isa<UndefValue>(CalleeArg))
5031 continue;
5032
5033 return false;
5034 }
5035
5036 return true;
5037}
5038
5039// Returns true if TCO is possible between the callers and callees
5040// calling conventions.
5041static bool
5043 CallingConv::ID CalleeCC) {
5044 // Tail calls are possible with fastcc and ccc.
5045 auto isTailCallableCC = [] (CallingConv::ID CC){
5046 return CC == CallingConv::C || CC == CallingConv::Fast;
5047 };
5048 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5049 return false;
5050
5051 // We can safely tail call both fastcc and ccc callees from a c calling
5052 // convention caller. If the caller is fastcc, we may have less stack space
5053 // than a non-fastcc caller with the same signature so disable tail-calls in
5054 // that case.
5055 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5056}
5057
5058bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5059 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5060 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5062 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5063 bool isCalleeExternalSymbol) const {
5064 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5065
5066 if (DisableSCO && !TailCallOpt) return false;
5067
5068 // Variadic argument functions are not supported.
5069 if (isVarArg) return false;
5070
5071 // Check that the calling conventions are compatible for tco.
5072 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5073 return false;
5074
5075 // Caller contains any byval parameter is not supported.
5076 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5077 return false;
5078
5079 // Callee contains any byval parameter is not supported, too.
5080 // Note: This is a quick work around, because in some cases, e.g.
5081 // caller's stack size > callee's stack size, we are still able to apply
5082 // sibling call optimization. For example, gcc is able to do SCO for caller1
5083 // in the following example, but not for caller2.
5084 // struct test {
5085 // long int a;
5086 // char ary[56];
5087 // } gTest;
5088 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5089 // b->a = v.a;
5090 // return 0;
5091 // }
5092 // void caller1(struct test a, struct test c, struct test *b) {
5093 // callee(gTest, b); }
5094 // void caller2(struct test *b) { callee(gTest, b); }
5095 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5096 return false;
5097
5098 // If callee and caller use different calling conventions, we cannot pass
5099 // parameters on stack since offsets for the parameter area may be different.
5100 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5101 return false;
5102
5103 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5104 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5105 // callee potentially have different TOC bases then we cannot tail call since
5106 // we need to restore the TOC pointer after the call.
5107 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5108 // We cannot guarantee this for indirect calls or calls to external functions.
5109 // When PC-Relative addressing is used, the concept of the TOC is no longer
5110 // applicable so this check is not required.
5111 // Check first for indirect calls.
5112 if (!Subtarget.isUsingPCRelativeCalls() &&
5113 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5114 return false;
5115
5116 // Check if we share the TOC base.
5117 if (!Subtarget.isUsingPCRelativeCalls() &&
5118 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5119 return false;
5120
5121 // TCO allows altering callee ABI, so we don't have to check further.
5122 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5123 return true;
5124
5125 if (DisableSCO) return false;
5126
5127 // If callee use the same argument list that caller is using, then we can
5128 // apply SCO on this case. If it is not, then we need to check if callee needs
5129 // stack for passing arguments.
5130 // PC Relative tail calls may not have a CallBase.
5131 // If there is no CallBase we cannot verify if we have the same argument
5132 // list so assume that we don't have the same argument list.
5133 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5134 needStackSlotPassParameters(Subtarget, Outs))
5135 return false;
5136 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5137 return false;
5138
5139 return true;
5140}
5141
5142/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5143/// for tail call optimization. Targets which want to do tail call
5144/// optimization should implement this function.
5145bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5146 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5147 CallingConv::ID CallerCC, bool isVarArg,
5148 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5149 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5150 return false;
5151
5152 // Variable argument functions are not supported.
5153 if (isVarArg)
5154 return false;
5155
5156 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5157 // Functions containing by val parameters are not supported.
5158 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5159 return false;
5160
5161 // Non-PIC/GOT tail calls are supported.
5162 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5163 return true;
5164
5165 // At the moment we can only do local tail calls (in same module, hidden
5166 // or protected) if we are generating PIC.
5167 if (CalleeGV)
5168 return CalleeGV->hasHiddenVisibility() ||
5169 CalleeGV->hasProtectedVisibility();
5170 }
5171
5172 return false;
5173}
5174
5175/// isCallCompatibleAddress - Return the immediate to use if the specified
5176/// 32-bit value is representable in the immediate field of a BxA instruction.
5178 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5179 if (!C) return nullptr;
5180
5181 int Addr = C->getZExtValue();
5182 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5183 SignExtend32<26>(Addr) != Addr)
5184 return nullptr; // Top 6 bits have to be sext of immediate.
5185
5186 return DAG
5187 .getConstant(
5188 (int)C->getZExtValue() >> 2, SDLoc(Op),
5190 .getNode();
5191}
5192
5193namespace {
5194
5195struct TailCallArgumentInfo {
5196 SDValue Arg;
5197 SDValue FrameIdxOp;
5198 int FrameIdx = 0;
5199
5200 TailCallArgumentInfo() = default;
5201};
5202
5203} // end anonymous namespace
5204
5205/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5207 SelectionDAG &DAG, SDValue Chain,
5208 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5209 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5210 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5211 SDValue Arg = TailCallArgs[i].Arg;
5212 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5213 int FI = TailCallArgs[i].FrameIdx;
5214 // Store relative to framepointer.
5215 MemOpChains.push_back(DAG.getStore(
5216 Chain, dl, Arg, FIN,
5218 }
5219}
5220
5221/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5222/// the appropriate stack slot for the tail call optimized function call.
5224 SDValue OldRetAddr, SDValue OldFP,
5225 int SPDiff, const SDLoc &dl) {
5226 if (SPDiff) {
5227 // Calculate the new stack slot for the return address.
5229 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5230 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5231 bool isPPC64 = Subtarget.isPPC64();
5232 int SlotSize = isPPC64 ? 8 : 4;
5233 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5234 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5235 NewRetAddrLoc, true);
5236 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5237 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5238 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5239 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5240 }
5241 return Chain;
5242}
5243
5244/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5245/// the position of the argument.
5246static void
5248 SDValue Arg, int SPDiff, unsigned ArgOffset,
5249 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5250 int Offset = ArgOffset + SPDiff;
5251 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5252 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5253 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5254 SDValue FIN = DAG.getFrameIndex(FI, VT);
5255 TailCallArgumentInfo Info;
5256 Info.Arg = Arg;
5257 Info.FrameIdxOp = FIN;
5258 Info.FrameIdx = FI;
5259 TailCallArguments.push_back(Info);
5260}
5261
5262/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5263/// stack slot. Returns the chain as result and the loaded frame pointers in
5264/// LROpOut/FPOpout. Used when tail calling.
5265SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5266 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5267 SDValue &FPOpOut, const SDLoc &dl) const {
5268 if (SPDiff) {
5269 // Load the LR and FP stack slot for later adjusting.
5270 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5271 LROpOut = getReturnAddrFrameIndex(DAG);
5272 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5273 Chain = SDValue(LROpOut.getNode(), 1);
5274 }
5275 return Chain;
5276}
5277
5278/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5279/// by "Src" to address "Dst" of size "Size". Alignment information is
5280/// specified by the specific parameter attribute. The copy will be passed as
5281/// a byval function parameter.
5282/// Sometimes what we are copying is the end of a larger object, the part that
5283/// does not fit in registers.
5285 SDValue Chain, ISD::ArgFlagsTy Flags,
5286 SelectionDAG &DAG, const SDLoc &dl) {
5287 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5288 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5289 Flags.getNonZeroByValAlign(), false, false, false,
5291}
5292
5293/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5294/// tail calls.
5296 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5297 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5298 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5299 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5301 if (!isTailCall) {
5302 if (isVector) {
5303 SDValue StackPtr;
5304 if (isPPC64)
5305 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5306 else
5307 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5308 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5309 DAG.getConstant(ArgOffset, dl, PtrVT));
5310 }
5311 MemOpChains.push_back(
5312 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5313 // Calculate and remember argument location.
5314 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5315 TailCallArguments);
5316}
5317
5318static void
5320 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5321 SDValue FPOp,
5322 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5323 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5324 // might overwrite each other in case of tail call optimization.
5325 SmallVector<SDValue, 8> MemOpChains2;
5326 // Do not flag preceding copytoreg stuff together with the following stuff.
5327 InGlue = SDValue();
5328 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5329 MemOpChains2, dl);
5330 if (!MemOpChains2.empty())
5331 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5332
5333 // Store the return address to the appropriate stack slot.
5334 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5335
5336 // Emit callseq_end just before tailcall node.
5337 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5338 InGlue = Chain.getValue(1);
5339}
5340
5341// Is this global address that of a function that can be called by name? (as
5342// opposed to something that must hold a descriptor for an indirect call).
5343static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5344 if (GV) {
5345 if (GV->isThreadLocal())
5346 return false;
5347
5348 return GV->getValueType()->isFunctionTy();
5349 }
5350
5351 return false;
5352}
5353
5354SDValue PPCTargetLowering::LowerCallResult(
5355 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5356 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5357 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5359 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5360 *DAG.getContext());
5361
5362 CCRetInfo.AnalyzeCallResult(
5363 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5365 : RetCC_PPC);
5366
5367 // Copy all of the result registers out of their specified physreg.
5368 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5369 CCValAssign &VA = RVLocs[i];
5370 assert(VA.isRegLoc() && "Can only return in registers!");
5371
5372 SDValue Val;
5373
5374 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5375 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5376 InGlue);
5377 Chain = Lo.getValue(1);
5378 InGlue = Lo.getValue(2);
5379 VA = RVLocs[++i]; // skip ahead to next loc
5380 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5381 InGlue);
5382 Chain = Hi.getValue(1);
5383 InGlue = Hi.getValue(2);
5384 if (!Subtarget.isLittleEndian())
5385 std::swap (Lo, Hi);
5386 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5387 } else {
5388 Val = DAG.getCopyFromReg(Chain, dl,
5389 VA.getLocReg(), VA.getLocVT(), InGlue);
5390 Chain = Val.getValue(1);
5391 InGlue = Val.getValue(2);
5392 }
5393
5394 switch (VA.getLocInfo()) {
5395 default: llvm_unreachable("Unknown loc info!");
5396 case CCValAssign::Full: break;
5397 case CCValAssign::AExt:
5398 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5399 break;
5400 case CCValAssign::ZExt:
5401 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5402 DAG.getValueType(VA.getValVT()));
5403 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5404 break;
5405 case CCValAssign::SExt:
5406 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5407 DAG.getValueType(VA.getValVT()));
5408 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5409 break;
5410 }
5411
5412 InVals.push_back(Val);
5413 }
5414
5415 return Chain;
5416}
5417
5418static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5419 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5420 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5421 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5422
5423 // PatchPoint calls are not indirect.
5424 if (isPatchPoint)
5425 return false;
5426
5427 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5428 return false;
5429
5430 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5431 // becuase the immediate function pointer points to a descriptor instead of
5432 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5433 // pointer immediate points to the global entry point, while the BLA would
5434 // need to jump to the local entry point (see rL211174).
5435 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5436 isBLACompatibleAddress(Callee, DAG))
5437 return false;
5438
5439 return true;
5440}
5441
5442// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5443static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5444 return Subtarget.isAIXABI() ||
5445 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5446}
5447
5449 const Function &Caller, const SDValue &Callee,
5450 const PPCSubtarget &Subtarget,
5451 const TargetMachine &TM,
5452 bool IsStrictFPCall = false) {
5453 if (CFlags.IsTailCall)
5454 return PPCISD::TC_RETURN;
5455
5456 unsigned RetOpc = 0;
5457 // This is a call through a function pointer.
5458 if (CFlags.IsIndirect) {
5459 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5460 // indirect calls. The save of the caller's TOC pointer to the stack will be
5461 // inserted into the DAG as part of call lowering. The restore of the TOC
5462 // pointer is modeled by using a pseudo instruction for the call opcode that
5463 // represents the 2 instruction sequence of an indirect branch and link,
5464 // immediately followed by a load of the TOC pointer from the stack save
5465 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5466 // as it is not saved or used.
5468 : PPCISD::BCTRL;
5469 } else if (Subtarget.isUsingPCRelativeCalls()) {
5470 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5471 RetOpc = PPCISD::CALL_NOTOC;
5472 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5473 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5474 // immediately following the call instruction if the caller and callee may
5475 // have different TOC bases. At link time if the linker determines the calls
5476 // may not share a TOC base, the call is redirected to a trampoline inserted
5477 // by the linker. The trampoline will (among other things) save the callers
5478 // TOC pointer at an ABI designated offset in the linkage area and the
5479 // linker will rewrite the nop to be a load of the TOC pointer from the
5480 // linkage area into gpr2.
5481 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5482 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5483 RetOpc =
5485 } else
5486 RetOpc = PPCISD::CALL;
5487 if (IsStrictFPCall) {
5488 switch (RetOpc) {
5489 default:
5490 llvm_unreachable("Unknown call opcode");
5493 break;
5494 case PPCISD::BCTRL:
5495 RetOpc = PPCISD::BCTRL_RM;
5496 break;
5497 case PPCISD::CALL_NOTOC:
5498 RetOpc = PPCISD::CALL_NOTOC_RM;
5499 break;
5500 case PPCISD::CALL:
5501 RetOpc = PPCISD::CALL_RM;
5502 break;
5503 case PPCISD::CALL_NOP:
5504 RetOpc = PPCISD::CALL_NOP_RM;
5505 break;
5506 }
5507 }
5508 return RetOpc;
5509}
5510
5511static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5512 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5513 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5514 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5515 return SDValue(Dest, 0);
5516
5517 // Returns true if the callee is local, and false otherwise.
5518 auto isLocalCallee = [&]() {
5519 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5520 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5521
5522 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5523 !isa_and_nonnull<GlobalIFunc>(GV);
5524 };
5525
5526 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5527 // a static relocation model causes some versions of GNU LD (2.17.50, at
5528 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5529 // built with secure-PLT.
5530 bool UsePlt =
5531 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5533
5534 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5535 const TargetMachine &TM = Subtarget.getTargetMachine();
5536 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5537 MCSymbolXCOFF *S =
5538 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5539
5541 return DAG.getMCSymbol(S, PtrVT);
5542 };
5543
5544 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5545 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5546 if (isFunctionGlobalAddress(GV)) {
5547 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5548
5549 if (Subtarget.isAIXABI()) {
5550 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5551 return getAIXFuncEntryPointSymbolSDNode(GV);
5552 }
5553 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5554 UsePlt ? PPCII::MO_PLT : 0);
5555 }
5556
5557 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5558 const char *SymName = S->getSymbol();
5559 if (Subtarget.isAIXABI()) {
5560 // If there exists a user-declared function whose name is the same as the
5561 // ExternalSymbol's, then we pick up the user-declared version.
5563 if (const Function *F =
5564 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5565 return getAIXFuncEntryPointSymbolSDNode(F);
5566
5567 // On AIX, direct function calls reference the symbol for the function's
5568 // entry point, which is named by prepending a "." before the function's
5569 // C-linkage name. A Qualname is returned here because an external
5570 // function entry point is a csect with XTY_ER property.
5571 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5572 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5573 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5574 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5576 return Sec->getQualNameSymbol();
5577 };
5578
5579 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5580 }
5581 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5582 UsePlt ? PPCII::MO_PLT : 0);
5583 }
5584
5585 // No transformation needed.
5586 assert(Callee.getNode() && "What no callee?");
5587 return Callee;
5588}
5589
5591 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5592 "Expected a CALLSEQ_STARTSDNode.");
5593
5594 // The last operand is the chain, except when the node has glue. If the node
5595 // has glue, then the last operand is the glue, and the chain is the second
5596 // last operand.
5597 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5598 if (LastValue.getValueType() != MVT::Glue)
5599 return LastValue;
5600
5601 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5602}
5603
5604// Creates the node that moves a functions address into the count register
5605// to prepare for an indirect call instruction.
5606static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5607 SDValue &Glue, SDValue &Chain,
5608 const SDLoc &dl) {
5609 SDValue MTCTROps[] = {Chain, Callee, Glue};
5610 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5611 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5612 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5613 // The glue is the second value produced.
5614 Glue = Chain.getValue(1);
5615}
5616
5618 SDValue &Glue, SDValue &Chain,
5619 SDValue CallSeqStart,
5620 const CallBase *CB, const SDLoc &dl,
5621 bool hasNest,
5622 const PPCSubtarget &Subtarget) {
5623 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5624 // entry point, but to the function descriptor (the function entry point
5625 // address is part of the function descriptor though).
5626 // The function descriptor is a three doubleword structure with the
5627 // following fields: function entry point, TOC base address and
5628 // environment pointer.
5629 // Thus for a call through a function pointer, the following actions need
5630 // to be performed:
5631 // 1. Save the TOC of the caller in the TOC save area of its stack
5632 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5633 // 2. Load the address of the function entry point from the function
5634 // descriptor.
5635 // 3. Load the TOC of the callee from the function descriptor into r2.
5636 // 4. Load the environment pointer from the function descriptor into
5637 // r11.
5638 // 5. Branch to the function entry point address.
5639 // 6. On return of the callee, the TOC of the caller needs to be
5640 // restored (this is done in FinishCall()).
5641 //
5642 // The loads are scheduled at the beginning of the call sequence, and the
5643 // register copies are flagged together to ensure that no other
5644 // operations can be scheduled in between. E.g. without flagging the
5645 // copies together, a TOC access in the caller could be scheduled between
5646 // the assignment of the callee TOC and the branch to the callee, which leads
5647 // to incorrect code.
5648
5649 // Start by loading the function address from the descriptor.
5650 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5651 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5655
5656 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5657
5658 // Registers used in building the DAG.
5659 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5660 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5661
5662 // Offsets of descriptor members.
5663 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5664 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5665
5666 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5667 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5668
5669 // One load for the functions entry point address.
5670 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5671 Alignment, MMOFlags);
5672
5673 // One for loading the TOC anchor for the module that contains the called
5674 // function.
5675 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5676 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5677 SDValue TOCPtr =
5678 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5679 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5680
5681 // One for loading the environment pointer.
5682 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5683 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5684 SDValue LoadEnvPtr =
5685 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5686 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5687
5688
5689 // Then copy the newly loaded TOC anchor to the TOC pointer.
5690 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5691 Chain = TOCVal.getValue(0);
5692 Glue = TOCVal.getValue(1);
5693
5694 // If the function call has an explicit 'nest' parameter, it takes the
5695 // place of the environment pointer.
5696 assert((!hasNest || !Subtarget.isAIXABI()) &&
5697 "Nest parameter is not supported on AIX.");
5698 if (!hasNest) {
5699 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5700 Chain = EnvVal.getValue(0);
5701 Glue = EnvVal.getValue(1);
5702 }
5703
5704 // The rest of the indirect call sequence is the same as the non-descriptor
5705 // DAG.
5706 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5707}
5708
5709static void
5711 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5712 SelectionDAG &DAG,
5713 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5714 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5715 const PPCSubtarget &Subtarget) {
5716 const bool IsPPC64 = Subtarget.isPPC64();
5717 // MVT for a general purpose register.
5718 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5719
5720 // First operand is always the chain.
5721 Ops.push_back(Chain);
5722
5723 // If it's a direct call pass the callee as the second operand.
5724 if (!CFlags.IsIndirect)
5725 Ops.push_back(Callee);
5726 else {
5727 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5728
5729 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5730 // on the stack (this would have been done in `LowerCall_64SVR4` or
5731 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5732 // represents both the indirect branch and a load that restores the TOC
5733 // pointer from the linkage area. The operand for the TOC restore is an add
5734 // of the TOC save offset to the stack pointer. This must be the second
5735 // operand: after the chain input but before any other variadic arguments.
5736 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5737 // saved or used.
5738 if (isTOCSaveRestoreRequired(Subtarget)) {
5739 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5740
5741 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5742 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5743 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5744 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5745 Ops.push_back(AddTOC);
5746 }
5747
5748 // Add the register used for the environment pointer.
5749 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5751 RegVT));
5752
5753
5754 // Add CTR register as callee so a bctr can be emitted later.
5755 if (CFlags.IsTailCall)
5756 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5757 }
5758
5759 // If this is a tail call add stack pointer delta.
5760 if (CFlags.IsTailCall)
5761 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5762
5763 // Add argument registers to the end of the list so that they are known live
5764 // into the call.
5765 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5766 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5767 RegsToPass[i].second.getValueType()));
5768
5769 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5770 // no way to mark dependencies as implicit here.
5771 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5772 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5773 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5774 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5775
5776 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5777 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5778 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5779
5780 // Add a register mask operand representing the call-preserved registers.
5781 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5782 const uint32_t *Mask =
5783 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5784 assert(Mask && "Missing call preserved mask for calling convention");
5785 Ops.push_back(DAG.getRegisterMask(Mask));
5786
5787 // If the glue is valid, it is the last operand.
5788 if (Glue.getNode())
5789 Ops.push_back(Glue);
5790}
5791
5792SDValue PPCTargetLowering::FinishCall(
5793 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5794 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5795 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5796 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5797 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5798
5799 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5800 Subtarget.isAIXABI())
5801 setUsesTOCBasePtr(DAG);
5802
5803 unsigned CallOpc =
5804 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5805 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5806
5807 if (!CFlags.IsIndirect)
5808 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5809 else if (Subtarget.usesFunctionDescriptors())
5810 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5811 dl, CFlags.HasNest, Subtarget);
5812 else
5813 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5814
5815 // Build the operand list for the call instruction.
5817 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5818 SPDiff, Subtarget);
5819
5820 // Emit tail call.
5821 if (CFlags.IsTailCall) {
5822 // Indirect tail call when using PC Relative calls do not have the same
5823 // constraints.
5824 assert(((Callee.getOpcode() == ISD::Register &&
5825 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5826 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5827 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5828 isa<ConstantSDNode>(Callee) ||
5829 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5830 "Expecting a global address, external symbol, absolute value, "
5831 "register or an indirect tail call when PC Relative calls are "
5832 "used.");
5833 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5834 assert(CallOpc == PPCISD::TC_RETURN &&
5835 "Unexpected call opcode for a tail call.");
5837 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5838 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5839 return Ret;
5840 }
5841
5842 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5843 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5844 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5845 Glue = Chain.getValue(1);
5846
5847 // When performing tail call optimization the callee pops its arguments off
5848 // the stack. Account for this here so these bytes can be pushed back on in
5849 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5850 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5852 ? NumBytes
5853 : 0;
5854
5855 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5856 Glue = Chain.getValue(1);
5857
5858 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5859 DAG, InVals);
5860}
5861
5863 CallingConv::ID CalleeCC = CB->getCallingConv();
5864 const Function *CallerFunc = CB->getCaller();
5865 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5866 const Function *CalleeFunc = CB->getCalledFunction();
5867 if (!CalleeFunc)
5868 return false;
5869 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5870
5873
5874 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5875 CalleeFunc->getAttributes(), Outs, *this,
5876 CalleeFunc->getDataLayout());
5877
5878 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5879 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5880 false /*isCalleeExternalSymbol*/);
5881}
5882
5883bool PPCTargetLowering::isEligibleForTCO(
5884 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5885 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5887 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5888 bool isCalleeExternalSymbol) const {
5889 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5890 return false;
5891
5892 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5893 return IsEligibleForTailCallOptimization_64SVR4(
5894 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5895 isCalleeExternalSymbol);
5896 else
5897 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5898 isVarArg, Ins);
5899}
5900
5901SDValue
5902PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5903 SmallVectorImpl<SDValue> &InVals) const {
5904 SelectionDAG &DAG = CLI.DAG;
5905 SDLoc &dl = CLI.DL;
5907 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5909 SDValue Chain = CLI.Chain;
5910 SDValue Callee = CLI.Callee;
5911 bool &isTailCall = CLI.IsTailCall;
5912 CallingConv::ID CallConv = CLI.CallConv;
5913 bool isVarArg = CLI.IsVarArg;
5914 bool isPatchPoint = CLI.IsPatchPoint;
5915 const CallBase *CB = CLI.CB;
5916
5917 if (isTailCall) {
5919 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5920 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5921 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5922 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5923
5924 isTailCall =
5925 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5926 &(MF.getFunction()), IsCalleeExternalSymbol);
5927 if (isTailCall) {
5928 ++NumTailCalls;
5929 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5930 ++NumSiblingCalls;
5931
5932 // PC Relative calls no longer guarantee that the callee is a Global
5933 // Address Node. The callee could be an indirect tail call in which
5934 // case the SDValue for the callee could be a load (to load the address
5935 // of a function pointer) or it may be a register copy (to move the
5936 // address of the callee from a function parameter into a virtual
5937 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5938 assert((Subtarget.isUsingPCRelativeCalls() ||
5939 isa<GlobalAddressSDNode>(Callee)) &&
5940 "Callee should be an llvm::Function object.");
5941
5942 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5943 << "\nTCO callee: ");
5944 LLVM_DEBUG(Callee.dump());
5945 }
5946 }
5947
5948 if (!isTailCall && CB && CB->isMustTailCall())
5949 report_fatal_error("failed to perform tail call elimination on a call "
5950 "site marked musttail");
5951
5952 // When long calls (i.e. indirect calls) are always used, calls are always
5953 // made via function pointer. If we have a function name, first translate it
5954 // into a pointer.
5955 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5956 !isTailCall)
5957 Callee = LowerGlobalAddress(Callee, DAG);
5958
5959 CallFlags CFlags(
5960 CallConv, isTailCall, isVarArg, isPatchPoint,
5961 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5962 // hasNest
5963 Subtarget.is64BitELFABI() &&
5964 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5965 CLI.NoMerge);
5966
5967 if (Subtarget.isAIXABI())
5968 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5969 InVals, CB);
5970
5971 assert(Subtarget.isSVR4ABI());
5972 if (Subtarget.isPPC64())
5973 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5974 InVals, CB);
5975 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5976 InVals, CB);
5977}
5978
5979SDValue PPCTargetLowering::LowerCall_32SVR4(
5980 SDValue Chain, SDValue Callee, CallFlags CFlags,
5982 const SmallVectorImpl<SDValue> &OutVals,
5983 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5985 const CallBase *CB) const {
5986 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5987 // of the 32-bit SVR4 ABI stack frame layout.
5988
5989 const CallingConv::ID CallConv = CFlags.CallConv;
5990 const bool IsVarArg = CFlags.IsVarArg;
5991 const bool IsTailCall = CFlags.IsTailCall;
5992
5993 assert((CallConv == CallingConv::C ||
5994 CallConv == CallingConv::Cold ||
5995 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5996
5997 const Align PtrAlign(4);
5998
6000
6001 // Mark this function as potentially containing a function that contains a
6002 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6003 // and restoring the callers stack pointer in this functions epilog. This is
6004 // done because by tail calling the called function might overwrite the value
6005 // in this function's (MF) stack pointer stack slot 0(SP).
6006 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6007 CallConv == CallingConv::Fast)
6008 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6009
6010 // Count how many bytes are to be pushed on the stack, including the linkage
6011 // area, parameter list area and the part of the local variable space which
6012 // contains copies of aggregates which are passed by value.
6013
6014 // Assign locations to all of the outgoing arguments.
6016 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6017
6018 // Reserve space for the linkage area on the stack.
6019 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6020 PtrAlign);
6021 if (useSoftFloat())
6022 CCInfo.PreAnalyzeCallOperands(Outs);
6023
6024 if (IsVarArg) {
6025 // Handle fixed and variable vector arguments differently.
6026 // Fixed vector arguments go into registers as long as registers are
6027 // available. Variable vector arguments always go into memory.
6028 unsigned NumArgs = Outs.size();
6029
6030 for (unsigned i = 0; i != NumArgs; ++i) {
6031 MVT ArgVT = Outs[i].VT;
6032 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6033 bool Result;
6034
6035 if (Outs[i].IsFixed) {
6036 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6037 CCInfo);
6038 } else {
6040 ArgFlags, CCInfo);
6041 }
6042
6043 if (Result) {
6044#ifndef NDEBUG
6045 errs() << "Call operand #" << i << " has unhandled type "
6046 << ArgVT << "\n";
6047#endif
6048 llvm_unreachable(nullptr);
6049 }
6050 }
6051 } else {
6052 // All arguments are treated the same.
6053 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6054 }
6055 CCInfo.clearWasPPCF128();
6056
6057 // Assign locations to all of the outgoing aggregate by value arguments.
6058 SmallVector<CCValAssign, 16> ByValArgLocs;
6059 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6060
6061 // Reserve stack space for the allocations in CCInfo.
6062 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6063
6064 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6065
6066 // Size of the linkage area, parameter list area and the part of the local
6067 // space variable where copies of aggregates which are passed by value are
6068 // stored.
6069 unsigned NumBytes = CCByValInfo.getStackSize();
6070
6071 // Calculate by how many bytes the stack has to be adjusted in case of tail
6072 // call optimization.
6073 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6074
6075 // Adjust the stack pointer for the new arguments...
6076 // These operations are automatically eliminated by the prolog/epilog pass
6077 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6078 SDValue CallSeqStart = Chain;
6079
6080 // Load the return address and frame pointer so it can be moved somewhere else
6081 // later.
6082 SDValue LROp, FPOp;
6083 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6084
6085 // Set up a copy of the stack pointer for use loading and storing any
6086 // arguments that may not fit in the registers available for argument
6087 // passing.
6088 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6089
6091 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6092 SmallVector<SDValue, 8> MemOpChains;
6093
6094 bool seenFloatArg = false;
6095 // Walk the register/memloc assignments, inserting copies/loads.
6096 // i - Tracks the index into the list of registers allocated for the call
6097 // RealArgIdx - Tracks the index into the list of actual function arguments
6098 // j - Tracks the index into the list of byval arguments
6099 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6100 i != e;
6101 ++i, ++RealArgIdx) {
6102 CCValAssign &VA = ArgLocs[i];
6103 SDValue Arg = OutVals[RealArgIdx];
6104 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6105
6106 if (Flags.isByVal()) {
6107 // Argument is an aggregate which is passed by value, thus we need to
6108 // create a copy of it in the local variable space of the current stack
6109 // frame (which is the stack frame of the caller) and pass the address of
6110 // this copy to the callee.
6111 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6112 CCValAssign &ByValVA = ByValArgLocs[j++];
6113 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6114
6115 // Memory reserved in the local variable space of the callers stack frame.
6116 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6117
6118 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6119 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6120 StackPtr, PtrOff);
6121
6122 // Create a copy of the argument in the local area of the current
6123 // stack frame.
6124 SDValue MemcpyCall =
6125 CreateCopyOfByValArgument(Arg, PtrOff,
6126 CallSeqStart.getNode()->getOperand(0),
6127 Flags, DAG, dl);
6128
6129 // This must go outside the CALLSEQ_START..END.
6130 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6131 SDLoc(MemcpyCall));
6132 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6133 NewCallSeqStart.getNode());
6134 Chain = CallSeqStart = NewCallSeqStart;
6135
6136 // Pass the address of the aggregate copy on the stack either in a
6137 // physical register or in the parameter list area of the current stack
6138 // frame to the callee.
6139 Arg = PtrOff;
6140 }
6141
6142 // When useCRBits() is true, there can be i1 arguments.
6143 // It is because getRegisterType(MVT::i1) => MVT::i1,
6144 // and for other integer types getRegisterType() => MVT::i32.
6145 // Extend i1 and ensure callee will get i32.
6146 if (Arg.getValueType() == MVT::i1)
6147 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6148 dl, MVT::i32, Arg);
6149
6150 if (VA.isRegLoc()) {
6151 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6152 // Put argument in a physical register.
6153 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6154 bool IsLE = Subtarget.isLittleEndian();
6155 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6156 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6157 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6158 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6159 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6160 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6161 SVal.getValue(0)));
6162 } else
6163 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6164 } else {
6165 // Put argument in the parameter list area of the current stack frame.
6166 assert(VA.isMemLoc());
6167 unsigned LocMemOffset = VA.getLocMemOffset();
6168
6169 if (!IsTailCall) {
6170 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6171 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6172 StackPtr, PtrOff);
6173
6174 MemOpChains.push_back(
6175 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6176 } else {
6177 // Calculate and remember argument location.
6178 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6179 TailCallArguments);
6180 }
6181 }
6182 }
6183
6184 if (!MemOpChains.empty())
6185 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6186
6187 // Build a sequence of copy-to-reg nodes chained together with token chain
6188 // and flag operands which copy the outgoing args into the appropriate regs.
6189 SDValue InGlue;
6190 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6191 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6192 RegsToPass[i].second, InGlue);
6193 InGlue = Chain.getValue(1);
6194 }
6195
6196 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6197 // registers.
6198 if (IsVarArg) {
6199 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6200 SDValue Ops[] = { Chain, InGlue };
6201
6202 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6203 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6204
6205 InGlue = Chain.getValue(1);
6206 }
6207
6208 if (IsTailCall)
6209 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6210 TailCallArguments);
6211
6212 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6213 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6214}
6215
6216// Copy an argument into memory, being careful to do this outside the
6217// call sequence for the call to which the argument belongs.
6218SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6219 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6220 SelectionDAG &DAG, const SDLoc &dl) const {
6221 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6222 CallSeqStart.getNode()->getOperand(0),
6223 Flags, DAG, dl);
6224 // The MEMCPY must go outside the CALLSEQ_START..END.
6225 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6226 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6227 SDLoc(MemcpyCall));
6228 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6229 NewCallSeqStart.getNode());
6230 return NewCallSeqStart;
6231}
6232
6233SDValue PPCTargetLowering::LowerCall_64SVR4(
6234 SDValue Chain, SDValue Callee, CallFlags CFlags,
6236 const SmallVectorImpl<SDValue> &OutVals,
6237 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6239 const CallBase *CB) const {
6240 bool isELFv2ABI = Subtarget.isELFv2ABI();
6241 bool isLittleEndian = Subtarget.isLittleEndian();
6242 unsigned NumOps = Outs.size();
6243 bool IsSibCall = false;
6244 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6245
6246 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6247 unsigned PtrByteSize = 8;
6248
6250
6251 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6252 IsSibCall = true;
6253
6254 // Mark this function as potentially containing a function that contains a
6255 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6256 // and restoring the callers stack pointer in this functions epilog. This is
6257 // done because by tail calling the called function might overwrite the value
6258 // in this function's (MF) stack pointer stack slot 0(SP).
6259 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6260 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6261
6262 assert(!(IsFastCall && CFlags.IsVarArg) &&
6263 "fastcc not supported on varargs functions");
6264
6265 // Count how many bytes are to be pushed on the stack, including the linkage
6266 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6267 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6268 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6269 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6270 unsigned NumBytes = LinkageSize;
6271 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6272
6273 static const MCPhysReg GPR[] = {
6274 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6275 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6276 };
6277 static const MCPhysReg VR[] = {
6278 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6279 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6280 };
6281
6282 const unsigned NumGPRs = std::size(GPR);
6283 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6284 const unsigned NumVRs = std::size(VR);
6285
6286 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6287 // can be passed to the callee in registers.
6288 // For the fast calling convention, there is another check below.
6289 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6290 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6291 if (!HasParameterArea) {
6292 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6293 unsigned AvailableFPRs = NumFPRs;
6294 unsigned AvailableVRs = NumVRs;
6295 unsigned NumBytesTmp = NumBytes;
6296 for (unsigned i = 0; i != NumOps; ++i) {
6297 if (Outs[i].Flags.isNest()) continue;
6298 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6299 PtrByteSize, LinkageSize, ParamAreaSize,
6300 NumBytesTmp, AvailableFPRs, AvailableVRs))
6301 HasParameterArea = true;
6302 }
6303 }
6304
6305 // When using the fast calling convention, we don't provide backing for
6306 // arguments that will be in registers.
6307 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6308
6309 // Avoid allocating parameter area for fastcc functions if all the arguments
6310 // can be passed in the registers.
6311 if (IsFastCall)
6312 HasParameterArea = false;
6313
6314 // Add up all the space actually used.
6315 for (unsigned i = 0; i != NumOps; ++i) {
6316 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6317 EVT ArgVT = Outs[i].VT;
6318 EVT OrigVT = Outs[i].ArgVT;
6319
6320 if (Flags.isNest())
6321 continue;
6322
6323 if (IsFastCall) {
6324 if (Flags.isByVal()) {
6325 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6326 if (NumGPRsUsed > NumGPRs)
6327 HasParameterArea = true;
6328 } else {
6329 switch (ArgVT.getSimpleVT().SimpleTy) {
6330 default: llvm_unreachable("Unexpected ValueType for argument!");
6331 case MVT::i1:
6332 case MVT::i32:
6333 case MVT::i64:
6334 if (++NumGPRsUsed <= NumGPRs)
6335 continue;
6336 break;
6337 case MVT::v4i32:
6338 case MVT::v8i16:
6339 case MVT::v16i8:
6340 case MVT::v2f64:
6341 case MVT::v2i64:
6342 case MVT::v1i128:
6343 case MVT::f128:
6344 if (++NumVRsUsed <= NumVRs)
6345 continue;
6346 break;
6347 case MVT::v4f32:
6348 if (++NumVRsUsed <= NumVRs)
6349 continue;
6350 break;
6351 case MVT::f32:
6352 case MVT::f64:
6353 if (++NumFPRsUsed <= NumFPRs)
6354 continue;
6355 break;
6356 }
6357 HasParameterArea = true;
6358 }
6359 }
6360
6361 /* Respect alignment of argument on the stack. */
6362 auto Alignement =
6363 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6364 NumBytes = alignTo(NumBytes, Alignement);
6365
6366 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6367 if (Flags.isInConsecutiveRegsLast())
6368 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6369 }
6370
6371 unsigned NumBytesActuallyUsed = NumBytes;
6372
6373 // In the old ELFv1 ABI,
6374 // the prolog code of the callee may store up to 8 GPR argument registers to
6375 // the stack, allowing va_start to index over them in memory if its varargs.
6376 // Because we cannot tell if this is needed on the caller side, we have to
6377 // conservatively assume that it is needed. As such, make sure we have at
6378 // least enough stack space for the caller to store the 8 GPRs.
6379 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6380 // really requires memory operands, e.g. a vararg function.
6381 if (HasParameterArea)
6382 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6383 else
6384 NumBytes = LinkageSize;
6385
6386 // Tail call needs the stack to be aligned.
6387 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6388 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6389
6390 int SPDiff = 0;
6391
6392 // Calculate by how many bytes the stack has to be adjusted in case of tail
6393 // call optimization.
6394 if (!IsSibCall)
6395 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6396
6397 // To protect arguments on the stack from being clobbered in a tail call,
6398 // force all the loads to happen before doing any other lowering.
6399 if (CFlags.IsTailCall)
6400 Chain = DAG.getStackArgumentTokenFactor(Chain);
6401
6402 // Adjust the stack pointer for the new arguments...
6403 // These operations are automatically eliminated by the prolog/epilog pass
6404 if (!IsSibCall)
6405 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6406 SDValue CallSeqStart = Chain;
6407
6408 // Load the return address and frame pointer so it can be move somewhere else
6409 // later.
6410 SDValue LROp, FPOp;
6411 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6412
6413 // Set up a copy of the stack pointer for use loading and storing any
6414 // arguments that may not fit in the registers available for argument
6415 // passing.
6416 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6417
6418 // Figure out which arguments are going to go in registers, and which in
6419 // memory. Also, if this is a vararg function, floating point operations
6420 // must be stored to our stack, and loaded into integer regs as well, if
6421 // any integer regs are available for argument passing.
6422 unsigned ArgOffset = LinkageSize;
6423
6425 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6426
6427 SmallVector<SDValue, 8> MemOpChains;
6428 for (unsigned i = 0; i != NumOps; ++i) {
6429 SDValue Arg = OutVals[i];
6430 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6431 EVT ArgVT = Outs[i].VT;
6432 EVT OrigVT = Outs[i].ArgVT;
6433
6434 // PtrOff will be used to store the current argument to the stack if a
6435 // register cannot be found for it.
6436 SDValue PtrOff;
6437
6438 // We re-align the argument offset for each argument, except when using the
6439 // fast calling convention, when we need to make sure we do that only when
6440 // we'll actually use a stack slot.
6441 auto ComputePtrOff = [&]() {
6442 /* Respect alignment of argument on the stack. */
6443 auto Alignment =
6444 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6445 ArgOffset = alignTo(ArgOffset, Alignment);
6446
6447 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6448
6449 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6450 };
6451
6452 if (!IsFastCall) {
6453 ComputePtrOff();
6454
6455 /* Compute GPR index associated with argument offset. */
6456 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6457 GPR_idx = std::min(GPR_idx, NumGPRs);
6458 }
6459
6460 // Promote integers to 64-bit values.
6461 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6462 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6463 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6464 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6465 }
6466
6467 // FIXME memcpy is used way more than necessary. Correctness first.
6468 // Note: "by value" is code for passing a structure by value, not
6469 // basic types.
6470 if (Flags.isByVal()) {
6471 // Note: Size includes alignment padding, so
6472 // struct x { short a; char b; }
6473 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6474 // These are the proper values we need for right-justifying the
6475 // aggregate in a parameter register.
6476 unsigned Size = Flags.getByValSize();
6477
6478 // An empty aggregate parameter takes up no storage and no
6479 // registers.
6480 if (Size == 0)
6481 continue;
6482
6483 if (IsFastCall)
6484 ComputePtrOff();
6485
6486 // All aggregates smaller than 8 bytes must be passed right-justified.
6487 if (Size==1 || Size==2 || Size==4) {
6488 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6489 if (GPR_idx != NumGPRs) {
6490 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6491 MachinePointerInfo(), VT);
6492 MemOpChains.push_back(Load.getValue(1));
6493 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6494
6495 ArgOffset += PtrByteSize;
6496 continue;
6497 }
6498 }
6499
6500 if (GPR_idx == NumGPRs && Size < 8) {
6501 SDValue AddPtr = PtrOff;
6502 if (!isLittleEndian) {
6503 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6504 PtrOff.getValueType());
6505 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6506 }
6507 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6508 CallSeqStart,
6509 Flags, DAG, dl);
6510 ArgOffset += PtrByteSize;
6511 continue;
6512 }
6513 // Copy the object to parameter save area if it can not be entirely passed
6514 // by registers.
6515 // FIXME: we only need to copy the parts which need to be passed in
6516 // parameter save area. For the parts passed by registers, we don't need
6517 // to copy them to the stack although we need to allocate space for them
6518 // in parameter save area.
6519 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6520 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6521 CallSeqStart,
6522 Flags, DAG, dl);
6523
6524 // When a register is available, pass a small aggregate right-justified.
6525 if (Size < 8 && GPR_idx != NumGPRs) {
6526 // The easiest way to get this right-justified in a register
6527 // is to copy the structure into the rightmost portion of a
6528 // local variable slot, then load the whole slot into the
6529 // register.
6530 // FIXME: The memcpy seems to produce pretty awful code for
6531 // small aggregates, particularly for packed ones.
6532 // FIXME: It would be preferable to use the slot in the
6533 // parameter save area instead of a new local variable.
6534 SDValue AddPtr = PtrOff;
6535 if (!isLittleEndian) {
6536 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6537 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6538 }
6539 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6540 CallSeqStart,
6541 Flags, DAG, dl);
6542
6543 // Load the slot into the register.
6544 SDValue Load =
6545 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6546 MemOpChains.push_back(Load.getValue(1));
6547 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6548
6549 // Done with this argument.
6550 ArgOffset += PtrByteSize;
6551 continue;
6552 }
6553
6554 // For aggregates larger than PtrByteSize, copy the pieces of the
6555 // object that fit into registers from the parameter save area.
6556 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6557 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6558 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6559 if (GPR_idx != NumGPRs) {
6560 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6561 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6562 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6563 MachinePointerInfo(), ObjType);
6564
6565 MemOpChains.push_back(Load.getValue(1));
6566 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6567 ArgOffset += PtrByteSize;
6568 } else {
6569 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6570 break;
6571 }
6572 }
6573 continue;
6574 }
6575
6576 switch (Arg.getSimpleValueType().SimpleTy) {
6577 default: llvm_unreachable("Unexpected ValueType for argument!");
6578 case MVT::i1:
6579 case MVT::i32:
6580 case MVT::i64:
6581 if (Flags.isNest()) {
6582 // The 'nest' parameter, if any, is passed in R11.
6583 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6584 break;
6585 }
6586
6587 // These can be scalar arguments or elements of an integer array type
6588 // passed directly. Clang may use those instead of "byval" aggregate
6589 // types to avoid forcing arguments to memory unnecessarily.
6590 if (GPR_idx != NumGPRs) {
6591 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6592 } else {
6593 if (IsFastCall)
6594 ComputePtrOff();
6595
6596 assert(HasParameterArea &&
6597 "Parameter area must exist to pass an argument in memory.");
6598 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6599 true, CFlags.IsTailCall, false, MemOpChains,
6600 TailCallArguments, dl);
6601 if (IsFastCall)
6602 ArgOffset += PtrByteSize;
6603 }
6604 if (!IsFastCall)
6605 ArgOffset += PtrByteSize;
6606 break;
6607 case MVT::f32:
6608 case MVT::f64: {
6609 // These can be scalar arguments or elements of a float array type
6610 // passed directly. The latter are used to implement ELFv2 homogenous
6611 // float aggregates.
6612
6613 // Named arguments go into FPRs first, and once they overflow, the
6614 // remaining arguments go into GPRs and then the parameter save area.
6615 // Unnamed arguments for vararg functions always go to GPRs and
6616 // then the parameter save area. For now, put all arguments to vararg
6617 // routines always in both locations (FPR *and* GPR or stack slot).
6618 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6619 bool NeededLoad = false;
6620
6621 // First load the argument into the next available FPR.
6622 if (FPR_idx != NumFPRs)
6623 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6624
6625 // Next, load the argument into GPR or stack slot if needed.
6626 if (!NeedGPROrStack)
6627 ;
6628 else if (GPR_idx != NumGPRs && !IsFastCall) {
6629 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6630 // once we support fp <-> gpr moves.
6631
6632 // In the non-vararg case, this can only ever happen in the
6633 // presence of f32 array types, since otherwise we never run
6634 // out of FPRs before running out of GPRs.
6635 SDValue ArgVal;
6636
6637 // Double values are always passed in a single GPR.
6638 if (Arg.getValueType() != MVT::f32) {
6639 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6640
6641 // Non-array float values are extended and passed in a GPR.
6642 } else if (!Flags.isInConsecutiveRegs()) {
6643 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6644 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6645
6646 // If we have an array of floats, we collect every odd element
6647 // together with its predecessor into one GPR.
6648 } else if (ArgOffset % PtrByteSize != 0) {
6649 SDValue Lo, Hi;
6650 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6651 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6652 if (!isLittleEndian)
6653 std::swap(Lo, Hi);
6654 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6655
6656 // The final element, if even, goes into the first half of a GPR.
6657 } else if (Flags.isInConsecutiveRegsLast()) {
6658 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6659 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6660 if (!isLittleEndian)
6661 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6662 DAG.getConstant(32, dl, MVT::i32));
6663
6664 // Non-final even elements are skipped; they will be handled
6665 // together the with subsequent argument on the next go-around.
6666 } else
6667 ArgVal = SDValue();
6668
6669 if (ArgVal.getNode())
6670 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6671 } else {
6672 if (IsFastCall)
6673 ComputePtrOff();
6674
6675 // Single-precision floating-point values are mapped to the
6676 // second (rightmost) word of the stack doubleword.
6677 if (Arg.getValueType() == MVT::f32 &&
6678 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6679 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6680 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6681 }
6682
6683 assert(HasParameterArea &&
6684 "Parameter area must exist to pass an argument in memory.");
6685 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6686 true, CFlags.IsTailCall, false, MemOpChains,
6687 TailCallArguments, dl);
6688
6689 NeededLoad = true;
6690 }
6691 // When passing an array of floats, the array occupies consecutive
6692 // space in the argument area; only round up to the next doubleword
6693 // at the end of the array. Otherwise, each float takes 8 bytes.
6694 if (!IsFastCall || NeededLoad) {
6695 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6696 Flags.isInConsecutiveRegs()) ? 4 : 8;
6697 if (Flags.isInConsecutiveRegsLast())
6698 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6699 }
6700 break;
6701 }
6702 case MVT::v4f32:
6703 case MVT::v4i32:
6704 case MVT::v8i16:
6705 case MVT::v16i8:
6706 case MVT::v2f64:
6707 case MVT::v2i64:
6708 case MVT::v1i128:
6709 case MVT::f128:
6710 // These can be scalar arguments or elements of a vector array type
6711 // passed directly. The latter are used to implement ELFv2 homogenous
6712 // vector aggregates.
6713
6714 // For a varargs call, named arguments go into VRs or on the stack as
6715 // usual; unnamed arguments always go to the stack or the corresponding
6716 // GPRs when within range. For now, we always put the value in both
6717 // locations (or even all three).
6718 if (CFlags.IsVarArg) {
6719 assert(HasParameterArea &&
6720 "Parameter area must exist if we have a varargs call.");
6721 // We could elide this store in the case where the object fits
6722 // entirely in R registers. Maybe later.
6723 SDValue Store =
6724 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6725 MemOpChains.push_back(Store);
6726 if (VR_idx != NumVRs) {
6727 SDValue Load =
6728 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6729 MemOpChains.push_back(Load.getValue(1));
6730 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6731 }
6732 ArgOffset += 16;
6733 for (unsigned i=0; i<16; i+=PtrByteSize) {
6734 if (GPR_idx == NumGPRs)
6735 break;
6736 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6737 DAG.getConstant(i, dl, PtrVT));
6738 SDValue Load =
6739 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6740 MemOpChains.push_back(Load.getValue(1));
6741 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6742 }
6743 break;
6744 }
6745
6746 // Non-varargs Altivec params go into VRs or on the stack.
6747 if (VR_idx != NumVRs) {
6748 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6749 } else {
6750 if (IsFastCall)
6751 ComputePtrOff();
6752
6753 assert(HasParameterArea &&
6754 "Parameter area must exist to pass an argument in memory.");
6755 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6756 true, CFlags.IsTailCall, true, MemOpChains,
6757 TailCallArguments, dl);
6758 if (IsFastCall)
6759 ArgOffset += 16;
6760 }
6761
6762 if (!IsFastCall)
6763 ArgOffset += 16;
6764 break;
6765 }
6766 }
6767
6768 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6769 "mismatch in size of parameter area");
6770 (void)NumBytesActuallyUsed;
6771
6772 if (!MemOpChains.empty())
6773 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6774
6775 // Check if this is an indirect call (MTCTR/BCTRL).
6776 // See prepareDescriptorIndirectCall and buildCallOperands for more
6777 // information about calls through function pointers in the 64-bit SVR4 ABI.
6778 if (CFlags.IsIndirect) {
6779 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6780 // caller in the TOC save area.
6781 if (isTOCSaveRestoreRequired(Subtarget)) {
6782 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6783 // Load r2 into a virtual register and store it to the TOC save area.
6784 setUsesTOCBasePtr(DAG);
6785 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6786 // TOC save area offset.
6787 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6788 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6789 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6790 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6792 DAG.getMachineFunction(), TOCSaveOffset));
6793 }
6794 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6795 // This does not mean the MTCTR instruction must use R12; it's easier
6796 // to model this as an extra parameter, so do that.
6797 if (isELFv2ABI && !CFlags.IsPatchPoint)
6798 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6799 }
6800
6801 // Build a sequence of copy-to-reg nodes chained together with token chain
6802 // and flag operands which copy the outgoing args into the appropriate regs.
6803 SDValue InGlue;
6804 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6805 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6806 RegsToPass[i].second, InGlue);
6807 InGlue = Chain.getValue(1);
6808 }
6809
6810 if (CFlags.IsTailCall && !IsSibCall)
6811 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6812 TailCallArguments);
6813
6814 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6815 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6816}
6817
6818// Returns true when the shadow of a general purpose argument register
6819// in the parameter save area is aligned to at least 'RequiredAlign'.
6820static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6821 assert(RequiredAlign.value() <= 16 &&
6822 "Required alignment greater than stack alignment.");
6823 switch (Reg) {
6824 default:
6825 report_fatal_error("called on invalid register.");
6826 case PPC::R5:
6827 case PPC::R9:
6828 case PPC::X3:
6829 case PPC::X5:
6830 case PPC::X7:
6831 case PPC::X9:
6832 // These registers are 16 byte aligned which is the most strict aligment
6833 // we can support.
6834 return true;
6835 case PPC::R3:
6836 case PPC::R7:
6837 case PPC::X4:
6838 case PPC::X6:
6839 case PPC::X8:
6840 case PPC::X10:
6841 // The shadow of these registers in the PSA is 8 byte aligned.
6842 return RequiredAlign <= 8;
6843 case PPC::R4:
6844 case PPC::R6:
6845 case PPC::R8:
6846 case PPC::R10:
6847 return RequiredAlign <= 4;
6848 }
6849}
6850
6851static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6852 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6853 CCState &S) {
6854 AIXCCState &State = static_cast<AIXCCState &>(S);
6855 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6857 const bool IsPPC64 = Subtarget.isPPC64();
6858 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6859 const Align PtrAlign(PtrSize);
6860 const Align StackAlign(16);
6861 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6862
6863 if (ValVT == MVT::f128)
6864 report_fatal_error("f128 is unimplemented on AIX.");
6865
6866 if (ArgFlags.isNest())
6867 report_fatal_error("Nest arguments are unimplemented.");
6868
6869 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6870 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6871 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6872 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6873 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6874 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6875
6876 static const MCPhysReg VR[] = {// Vector registers.
6877 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6878 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6879 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6880
6881 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6882
6883 if (ArgFlags.isByVal()) {
6884 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6885 if (ByValAlign > StackAlign)
6886 report_fatal_error("Pass-by-value arguments with alignment greater than "
6887 "16 are not supported.");
6888
6889 const unsigned ByValSize = ArgFlags.getByValSize();
6890 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6891
6892 // An empty aggregate parameter takes up no storage and no registers,
6893 // but needs a MemLoc for a stack slot for the formal arguments side.
6894 if (ByValSize == 0) {
6896 State.getStackSize(), RegVT, LocInfo));
6897 return false;
6898 }
6899
6900 // Shadow allocate any registers that are not properly aligned.
6901 unsigned NextReg = State.getFirstUnallocated(GPRs);
6902 while (NextReg != GPRs.size() &&
6903 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6904 // Shadow allocate next registers since its aligment is not strict enough.
6905 unsigned Reg = State.AllocateReg(GPRs);
6906 // Allocate the stack space shadowed by said register.
6907 State.AllocateStack(PtrSize, PtrAlign);
6908 assert(Reg && "Alocating register unexpectedly failed.");
6909 (void)Reg;
6910 NextReg = State.getFirstUnallocated(GPRs);
6911 }
6912
6913 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6914 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6915 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6916 if (unsigned Reg = State.AllocateReg(GPRs))
6917 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6918 else {
6921 LocInfo));
6922 break;
6923 }
6924 }
6925 return false;
6926 }
6927
6928 // Arguments always reserve parameter save area.
6929 switch (ValVT.SimpleTy) {
6930 default:
6931 report_fatal_error("Unhandled value type for argument.");
6932 case MVT::i64:
6933 // i64 arguments should have been split to i32 for PPC32.
6934 assert(IsPPC64 && "PPC32 should have split i64 values.");
6935 [[fallthrough]];
6936 case MVT::i1:
6937 case MVT::i32: {
6938 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6939 // AIX integer arguments are always passed in register width.
6940 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6941 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6943 if (unsigned Reg = State.AllocateReg(GPRs))
6944 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6945 else
6946 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6947
6948 return false;
6949 }
6950 case MVT::f32:
6951 case MVT::f64: {
6952 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6953 const unsigned StoreSize = LocVT.getStoreSize();
6954 // Floats are always 4-byte aligned in the PSA on AIX.
6955 // This includes f64 in 64-bit mode for ABI compatibility.
6956 const unsigned Offset =
6957 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6958 unsigned FReg = State.AllocateReg(FPR);
6959 if (FReg)
6960 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6961
6962 // Reserve and initialize GPRs or initialize the PSA as required.
6963 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6964 if (unsigned Reg = State.AllocateReg(GPRs)) {
6965 assert(FReg && "An FPR should be available when a GPR is reserved.");
6966 if (State.isVarArg()) {
6967 // Successfully reserved GPRs are only initialized for vararg calls.
6968 // Custom handling is required for:
6969 // f64 in PPC32 needs to be split into 2 GPRs.
6970 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6971 State.addLoc(
6972 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6973 }
6974 } else {
6975 // If there are insufficient GPRs, the PSA needs to be initialized.
6976 // Initialization occurs even if an FPR was initialized for
6977 // compatibility with the AIX XL compiler. The full memory for the
6978 // argument will be initialized even if a prior word is saved in GPR.
6979 // A custom memLoc is used when the argument also passes in FPR so
6980 // that the callee handling can skip over it easily.
6981 State.addLoc(
6982 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6983 LocInfo)
6984 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6985 break;
6986 }
6987 }
6988
6989 return false;
6990 }
6991 case MVT::v4f32:
6992 case MVT::v4i32:
6993 case MVT::v8i16:
6994 case MVT::v16i8:
6995 case MVT::v2i64:
6996 case MVT::v2f64:
6997 case MVT::v1i128: {
6998 const unsigned VecSize = 16;
6999 const Align VecAlign(VecSize);
7000
7001 if (!State.isVarArg()) {
7002 // If there are vector registers remaining we don't consume any stack
7003 // space.
7004 if (unsigned VReg = State.AllocateReg(VR)) {
7005 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7006 return false;
7007 }
7008 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7009 // might be allocated in the portion of the PSA that is shadowed by the
7010 // GPRs.
7011 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7012 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7013 return false;
7014 }
7015
7016 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7017 // Burn any underaligned registers and their shadowed stack space until
7018 // we reach the required alignment.
7019 while (NextRegIndex != GPRs.size() &&
7020 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7021 // Shadow allocate register and its stack shadow.
7022 unsigned Reg = State.AllocateReg(GPRs);
7023 State.AllocateStack(PtrSize, PtrAlign);
7024 assert(Reg && "Allocating register unexpectedly failed.");
7025 (void)Reg;
7026 NextRegIndex = State.getFirstUnallocated(GPRs);
7027 }
7028
7029 // Vectors that are passed as fixed arguments are handled differently.
7030 // They are passed in VRs if any are available (unlike arguments passed
7031 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7032 // functions)
7033 if (State.isFixed(ValNo)) {
7034 if (unsigned VReg = State.AllocateReg(VR)) {
7035 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7036 // Shadow allocate GPRs and stack space even though we pass in a VR.
7037 for (unsigned I = 0; I != VecSize; I += PtrSize)
7038 State.AllocateReg(GPRs);
7039 State.AllocateStack(VecSize, VecAlign);
7040 return false;
7041 }
7042 // No vector registers remain so pass on the stack.
7043 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7044 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7045 return false;
7046 }
7047
7048 // If all GPRS are consumed then we pass the argument fully on the stack.
7049 if (NextRegIndex == GPRs.size()) {
7050 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7051 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7052 return false;
7053 }
7054
7055 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7056 // half of the argument, and then need to pass the remaining half on the
7057 // stack.
7058 if (GPRs[NextRegIndex] == PPC::R9) {
7059 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7060 State.addLoc(
7061 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7062
7063 const unsigned FirstReg = State.AllocateReg(PPC::R9);
7064 const unsigned SecondReg = State.AllocateReg(PPC::R10);
7065 assert(FirstReg && SecondReg &&
7066 "Allocating R9 or R10 unexpectedly failed.");
7067 State.addLoc(
7068 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7069 State.addLoc(
7070 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7071 return false;
7072 }
7073
7074 // We have enough GPRs to fully pass the vector argument, and we have
7075 // already consumed any underaligned registers. Start with the custom
7076 // MemLoc and then the custom RegLocs.
7077 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7078 State.addLoc(
7079 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7080 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7081 const unsigned Reg = State.AllocateReg(GPRs);
7082 assert(Reg && "Failed to allocated register for vararg vector argument");
7083 State.addLoc(
7084 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7085 }
7086 return false;
7087 }
7088 }
7089 return true;
7090}
7091
7092// So far, this function is only used by LowerFormalArguments_AIX()
7094 bool IsPPC64,
7095 bool HasP8Vector,
7096 bool HasVSX) {
7097 assert((IsPPC64 || SVT != MVT::i64) &&
7098 "i64 should have been split for 32-bit codegen.");
7099
7100 switch (SVT) {
7101 default:
7102 report_fatal_error("Unexpected value type for formal argument");
7103 case MVT::i1:
7104 case MVT::i32:
7105 case MVT::i64:
7106 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7107 case MVT::f32:
7108 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7109 case MVT::f64:
7110 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7111 case MVT::v4f32:
7112 case MVT::v4i32:
7113 case MVT::v8i16:
7114 case MVT::v16i8:
7115 case MVT::v2i64:
7116 case MVT::v2f64:
7117 case MVT::v1i128:
7118 return &PPC::VRRCRegClass;
7119 }
7120}
7121
7123 SelectionDAG &DAG, SDValue ArgValue,
7124 MVT LocVT, const SDLoc &dl) {
7125 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7126 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7127
7128 if (Flags.isSExt())
7129 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7130 DAG.getValueType(ValVT));
7131 else if (Flags.isZExt())
7132 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7133 DAG.getValueType(ValVT));
7134
7135 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7136}
7137
7138static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7139 const unsigned LASize = FL->getLinkageSize();
7140
7141 if (PPC::GPRCRegClass.contains(Reg)) {
7142 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7143 "Reg must be a valid argument register!");
7144 return LASize + 4 * (Reg - PPC::R3);
7145 }
7146
7147 if (PPC::G8RCRegClass.contains(Reg)) {
7148 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7149 "Reg must be a valid argument register!");
7150 return LASize + 8 * (Reg - PPC::X3);
7151 }
7152
7153 llvm_unreachable("Only general purpose registers expected.");
7154}
7155
7156// AIX ABI Stack Frame Layout:
7157//
7158// Low Memory +--------------------------------------------+
7159// SP +---> | Back chain | ---+
7160// | +--------------------------------------------+ |
7161// | | Saved Condition Register | |
7162// | +--------------------------------------------+ |
7163// | | Saved Linkage Register | |
7164// | +--------------------------------------------+ | Linkage Area
7165// | | Reserved for compilers | |
7166// | +--------------------------------------------+ |
7167// | | Reserved for binders | |
7168// | +--------------------------------------------+ |
7169// | | Saved TOC pointer | ---+
7170// | +--------------------------------------------+
7171// | | Parameter save area |
7172// | +--------------------------------------------+
7173// | | Alloca space |
7174// | +--------------------------------------------+
7175// | | Local variable space |
7176// | +--------------------------------------------+
7177// | | Float/int conversion temporary |
7178// | +--------------------------------------------+
7179// | | Save area for AltiVec registers |
7180// | +--------------------------------------------+
7181// | | AltiVec alignment padding |
7182// | +--------------------------------------------+
7183// | | Save area for VRSAVE register |
7184// | +--------------------------------------------+
7185// | | Save area for General Purpose registers |
7186// | +--------------------------------------------+
7187// | | Save area for Floating Point registers |
7188// | +--------------------------------------------+
7189// +---- | Back chain |
7190// High Memory +--------------------------------------------+
7191//
7192// Specifications:
7193// AIX 7.2 Assembler Language Reference
7194// Subroutine linkage convention
7195
7196SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7197 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7198 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7199 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7200
7201 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7202 CallConv == CallingConv::Fast) &&
7203 "Unexpected calling convention!");
7204
7205 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7206 report_fatal_error("Tail call support is unimplemented on AIX.");
7207
7208 if (useSoftFloat())
7209 report_fatal_error("Soft float support is unimplemented on AIX.");
7210
7211 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7212
7213 const bool IsPPC64 = Subtarget.isPPC64();
7214 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7215
7216 // Assign locations to all of the incoming arguments.
7219 MachineFrameInfo &MFI = MF.getFrameInfo();
7220 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7221 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7222
7223 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7224 // Reserve space for the linkage area on the stack.
7225 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7226 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7227 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7228
7230
7231 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7232 CCValAssign &VA = ArgLocs[I++];
7233 MVT LocVT = VA.getLocVT();
7234 MVT ValVT = VA.getValVT();
7235 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7236 // For compatibility with the AIX XL compiler, the float args in the
7237 // parameter save area are initialized even if the argument is available
7238 // in register. The caller is required to initialize both the register
7239 // and memory, however, the callee can choose to expect it in either.
7240 // The memloc is dismissed here because the argument is retrieved from
7241 // the register.
7242 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7243 continue;
7244
7245 auto HandleMemLoc = [&]() {
7246 const unsigned LocSize = LocVT.getStoreSize();
7247 const unsigned ValSize = ValVT.getStoreSize();
7248 assert((ValSize <= LocSize) &&
7249 "Object size is larger than size of MemLoc");
7250 int CurArgOffset = VA.getLocMemOffset();
7251 // Objects are right-justified because AIX is big-endian.
7252 if (LocSize > ValSize)
7253 CurArgOffset += LocSize - ValSize;
7254 // Potential tail calls could cause overwriting of argument stack slots.
7255 const bool IsImmutable =
7257 (CallConv == CallingConv::Fast));
7258 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7259 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7260 SDValue ArgValue =
7261 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7262 InVals.push_back(ArgValue);
7263 };
7264
7265 // Vector arguments to VaArg functions are passed both on the stack, and
7266 // in any available GPRs. Load the value from the stack and add the GPRs
7267 // as live ins.
7268 if (VA.isMemLoc() && VA.needsCustom()) {
7269 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7270 assert(isVarArg && "Only use custom memloc for vararg.");
7271 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7272 // matching custom RegLocs.
7273 const unsigned OriginalValNo = VA.getValNo();
7274 (void)OriginalValNo;
7275
7276 auto HandleCustomVecRegLoc = [&]() {
7277 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7278 "Missing custom RegLoc.");
7279 VA = ArgLocs[I++];
7280 assert(VA.getValVT().isVector() &&
7281 "Unexpected Val type for custom RegLoc.");
7282 assert(VA.getValNo() == OriginalValNo &&
7283 "ValNo mismatch between custom MemLoc and RegLoc.");
7285 MF.addLiveIn(VA.getLocReg(),
7286 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7287 Subtarget.hasVSX()));
7288 };
7289
7290 HandleMemLoc();
7291 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7292 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7293 // R10.
7294 HandleCustomVecRegLoc();
7295 HandleCustomVecRegLoc();
7296
7297 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7298 // we passed the vector in R5, R6, R7 and R8.
7299 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7300 assert(!IsPPC64 &&
7301 "Only 2 custom RegLocs expected for 64-bit codegen.");
7302 HandleCustomVecRegLoc();
7303 HandleCustomVecRegLoc();
7304 }
7305
7306 continue;
7307 }
7308
7309 if (VA.isRegLoc()) {
7310 if (VA.getValVT().isScalarInteger())
7312 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7313 switch (VA.getValVT().SimpleTy) {
7314 default:
7315 report_fatal_error("Unhandled value type for argument.");
7316 case MVT::f32:
7318 break;
7319 case MVT::f64:
7321 break;
7322 }
7323 } else if (VA.getValVT().isVector()) {
7324 switch (VA.getValVT().SimpleTy) {
7325 default:
7326 report_fatal_error("Unhandled value type for argument.");
7327 case MVT::v16i8:
7329 break;
7330 case MVT::v8i16:
7332 break;
7333 case MVT::v4i32:
7334 case MVT::v2i64:
7335 case MVT::v1i128:
7337 break;
7338 case MVT::v4f32:
7339 case MVT::v2f64:
7341 break;
7342 }
7343 }
7344 }
7345
7346 if (Flags.isByVal() && VA.isMemLoc()) {
7347 const unsigned Size =
7348 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7349 PtrByteSize);
7350 const int FI = MF.getFrameInfo().CreateFixedObject(
7351 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7352 /* IsAliased */ true);
7353 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7354 InVals.push_back(FIN);
7355
7356 continue;
7357 }
7358
7359 if (Flags.isByVal()) {
7360 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7361
7362 const MCPhysReg ArgReg = VA.getLocReg();
7363 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7364
7365 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7366 const int FI = MF.getFrameInfo().CreateFixedObject(
7367 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7368 /* IsAliased */ true);
7369 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7370 InVals.push_back(FIN);
7371
7372 // Add live ins for all the RegLocs for the same ByVal.
7373 const TargetRegisterClass *RegClass =
7374 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7375
7376 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7377 unsigned Offset) {
7378 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7379 // Since the callers side has left justified the aggregate in the
7380 // register, we can simply store the entire register into the stack
7381 // slot.
7382 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7383 // The store to the fixedstack object is needed becuase accessing a
7384 // field of the ByVal will use a gep and load. Ideally we will optimize
7385 // to extracting the value from the register directly, and elide the
7386 // stores when the arguments address is not taken, but that will need to
7387 // be future work.
7388 SDValue Store = DAG.getStore(
7389 CopyFrom.getValue(1), dl, CopyFrom,
7392
7393 MemOps.push_back(Store);
7394 };
7395
7396 unsigned Offset = 0;
7397 HandleRegLoc(VA.getLocReg(), Offset);
7398 Offset += PtrByteSize;
7399 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7400 Offset += PtrByteSize) {
7401 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7402 "RegLocs should be for ByVal argument.");
7403
7404 const CCValAssign RL = ArgLocs[I++];
7405 HandleRegLoc(RL.getLocReg(), Offset);
7407 }
7408
7409 if (Offset != StackSize) {
7410 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7411 "Expected MemLoc for remaining bytes.");
7412 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7413 // Consume the MemLoc.The InVal has already been emitted, so nothing
7414 // more needs to be done.
7415 ++I;
7416 }
7417
7418 continue;
7419 }
7420
7421 if (VA.isRegLoc() && !VA.needsCustom()) {
7422 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7423 Register VReg =
7424 MF.addLiveIn(VA.getLocReg(),
7425 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7426 Subtarget.hasVSX()));
7427 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7428 if (ValVT.isScalarInteger() &&
7429 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7430 ArgValue =
7431 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7432 }
7433 InVals.push_back(ArgValue);
7434 continue;
7435 }
7436 if (VA.isMemLoc()) {
7437 HandleMemLoc();
7438 continue;
7439 }
7440 }
7441
7442 // On AIX a minimum of 8 words is saved to the parameter save area.
7443 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7444 // Area that is at least reserved in the caller of this function.
7445 unsigned CallerReservedArea = std::max<unsigned>(
7446 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7447
7448 // Set the size that is at least reserved in caller of this function. Tail
7449 // call optimized function's reserved stack space needs to be aligned so
7450 // that taking the difference between two stack areas will result in an
7451 // aligned stack.
7452 CallerReservedArea =
7453 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7454 FuncInfo->setMinReservedArea(CallerReservedArea);
7455
7456 if (isVarArg) {
7457 FuncInfo->setVarArgsFrameIndex(
7458 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7459 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7460
7461 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7462 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7463
7464 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7465 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7466 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7467
7468 // The fixed integer arguments of a variadic function are stored to the
7469 // VarArgsFrameIndex on the stack so that they may be loaded by
7470 // dereferencing the result of va_next.
7471 for (unsigned GPRIndex =
7472 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7473 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7474
7475 const Register VReg =
7476 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7477 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7478
7479 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7480 SDValue Store =
7481 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7482 MemOps.push_back(Store);
7483 // Increment the address for the next argument to store.
7484 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7485 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7486 }
7487 }
7488
7489 if (!MemOps.empty())
7490 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7491
7492 return Chain;
7493}
7494
7495SDValue PPCTargetLowering::LowerCall_AIX(
7496 SDValue Chain, SDValue Callee, CallFlags CFlags,
7498 const SmallVectorImpl<SDValue> &OutVals,
7499 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7501 const CallBase *CB) const {
7502 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7503 // AIX ABI stack frame layout.
7504
7505 assert((CFlags.CallConv == CallingConv::C ||
7506 CFlags.CallConv == CallingConv::Cold ||
7507 CFlags.CallConv == CallingConv::Fast) &&
7508 "Unexpected calling convention!");
7509
7510 if (CFlags.IsPatchPoint)
7511 report_fatal_error("This call type is unimplemented on AIX.");
7512
7513 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7514
7517 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7518 *DAG.getContext());
7519
7520 // Reserve space for the linkage save area (LSA) on the stack.
7521 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7522 // [SP][CR][LR][2 x reserved][TOC].
7523 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7524 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7525 const bool IsPPC64 = Subtarget.isPPC64();
7526 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7527 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7528 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7529 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7530
7531 // The prolog code of the callee may store up to 8 GPR argument registers to
7532 // the stack, allowing va_start to index over them in memory if the callee
7533 // is variadic.
7534 // Because we cannot tell if this is needed on the caller side, we have to
7535 // conservatively assume that it is needed. As such, make sure we have at
7536 // least enough stack space for the caller to store the 8 GPRs.
7537 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7538 const unsigned NumBytes = std::max<unsigned>(
7539 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7540
7541 // Adjust the stack pointer for the new arguments...
7542 // These operations are automatically eliminated by the prolog/epilog pass.
7543 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7544 SDValue CallSeqStart = Chain;
7545
7547 SmallVector<SDValue, 8> MemOpChains;
7548
7549 // Set up a copy of the stack pointer for loading and storing any
7550 // arguments that may not fit in the registers available for argument
7551 // passing.
7552 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7553 : DAG.getRegister(PPC::R1, MVT::i32);
7554
7555 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7556 const unsigned ValNo = ArgLocs[I].getValNo();
7557 SDValue Arg = OutVals[ValNo];
7558 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7559
7560 if (Flags.isByVal()) {
7561 const unsigned ByValSize = Flags.getByValSize();
7562
7563 // Nothing to do for zero-sized ByVals on the caller side.
7564 if (!ByValSize) {
7565 ++I;
7566 continue;
7567 }
7568
7569 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7570 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7571 (LoadOffset != 0)
7572 ? DAG.getObjectPtrOffset(
7573 dl, Arg, TypeSize::getFixed(LoadOffset))
7574 : Arg,
7575 MachinePointerInfo(), VT);
7576 };
7577
7578 unsigned LoadOffset = 0;
7579
7580 // Initialize registers, which are fully occupied by the by-val argument.
7581 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7582 SDValue Load = GetLoad(PtrVT, LoadOffset);
7583 MemOpChains.push_back(Load.getValue(1));
7584 LoadOffset += PtrByteSize;
7585 const CCValAssign &ByValVA = ArgLocs[I++];
7586 assert(ByValVA.getValNo() == ValNo &&
7587 "Unexpected location for pass-by-value argument.");
7588 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7589 }
7590
7591 if (LoadOffset == ByValSize)
7592 continue;
7593
7594 // There must be one more loc to handle the remainder.
7595 assert(ArgLocs[I].getValNo() == ValNo &&
7596 "Expected additional location for by-value argument.");
7597
7598 if (ArgLocs[I].isMemLoc()) {
7599 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7600 const CCValAssign &ByValVA = ArgLocs[I++];
7601 ISD::ArgFlagsTy MemcpyFlags = Flags;
7602 // Only memcpy the bytes that don't pass in register.
7603 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7604 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7605 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7606 dl, Arg, TypeSize::getFixed(LoadOffset))
7607 : Arg,
7609 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7610 CallSeqStart, MemcpyFlags, DAG, dl);
7611 continue;
7612 }
7613
7614 // Initialize the final register residue.
7615 // Any residue that occupies the final by-val arg register must be
7616 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7617 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7618 // 2 and 1 byte loads.
7619 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7620 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7621 "Unexpected register residue for by-value argument.");
7622 SDValue ResidueVal;
7623 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7624 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7625 const MVT VT =
7626 N == 1 ? MVT::i8
7627 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7628 SDValue Load = GetLoad(VT, LoadOffset);
7629 MemOpChains.push_back(Load.getValue(1));
7630 LoadOffset += N;
7631 Bytes += N;
7632
7633 // By-val arguments are passed left-justfied in register.
7634 // Every load here needs to be shifted, otherwise a full register load
7635 // should have been used.
7636 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7637 "Unexpected load emitted during handling of pass-by-value "
7638 "argument.");
7639 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7640 EVT ShiftAmountTy =
7641 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7642 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7643 SDValue ShiftedLoad =
7644 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7645 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7646 ShiftedLoad)
7647 : ShiftedLoad;
7648 }
7649
7650 const CCValAssign &ByValVA = ArgLocs[I++];
7651 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7652 continue;
7653 }
7654
7655 CCValAssign &VA = ArgLocs[I++];
7656 const MVT LocVT = VA.getLocVT();
7657 const MVT ValVT = VA.getValVT();
7658
7659 switch (VA.getLocInfo()) {
7660 default:
7661 report_fatal_error("Unexpected argument extension type.");
7662 case CCValAssign::Full:
7663 break;
7664 case CCValAssign::ZExt:
7665 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7666 break;
7667 case CCValAssign::SExt:
7668 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7669 break;
7670 }
7671
7672 if (VA.isRegLoc() && !VA.needsCustom()) {
7673 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7674 continue;
7675 }
7676
7677 // Vector arguments passed to VarArg functions need custom handling when
7678 // they are passed (at least partially) in GPRs.
7679 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7680 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7681 // Store value to its stack slot.
7682 SDValue PtrOff =
7683 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7684 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7685 SDValue Store =
7686 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7687 MemOpChains.push_back(Store);
7688 const unsigned OriginalValNo = VA.getValNo();
7689 // Then load the GPRs from the stack
7690 unsigned LoadOffset = 0;
7691 auto HandleCustomVecRegLoc = [&]() {
7692 assert(I != E && "Unexpected end of CCvalAssigns.");
7693 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7694 "Expected custom RegLoc.");
7695 CCValAssign RegVA = ArgLocs[I++];
7696 assert(RegVA.getValNo() == OriginalValNo &&
7697 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7698 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7699 DAG.getConstant(LoadOffset, dl, PtrVT));
7700 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7701 MemOpChains.push_back(Load.getValue(1));
7702 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7703 LoadOffset += PtrByteSize;
7704 };
7705
7706 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7707 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7708 // R10.
7709 HandleCustomVecRegLoc();
7710 HandleCustomVecRegLoc();
7711
7712 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7713 ArgLocs[I].getValNo() == OriginalValNo) {
7714 assert(!IsPPC64 &&
7715 "Only 2 custom RegLocs expected for 64-bit codegen.");
7716 HandleCustomVecRegLoc();
7717 HandleCustomVecRegLoc();
7718 }
7719
7720 continue;
7721 }
7722
7723 if (VA.isMemLoc()) {
7724 SDValue PtrOff =
7725 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7726 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7727 MemOpChains.push_back(
7728 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7729
7730 continue;
7731 }
7732
7733 if (!ValVT.isFloatingPoint())
7735 "Unexpected register handling for calling convention.");
7736
7737 // Custom handling is used for GPR initializations for vararg float
7738 // arguments.
7739 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7740 LocVT.isInteger() &&
7741 "Custom register handling only expected for VarArg.");
7742
7743 SDValue ArgAsInt =
7744 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7745
7746 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7747 // f32 in 32-bit GPR
7748 // f64 in 64-bit GPR
7749 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7750 else if (Arg.getValueType().getFixedSizeInBits() <
7751 LocVT.getFixedSizeInBits())
7752 // f32 in 64-bit GPR.
7753 RegsToPass.push_back(std::make_pair(
7754 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7755 else {
7756 // f64 in two 32-bit GPRs
7757 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7758 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7759 "Unexpected custom register for argument!");
7760 CCValAssign &GPR1 = VA;
7761 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7762 DAG.getConstant(32, dl, MVT::i8));
7763 RegsToPass.push_back(std::make_pair(
7764 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7765
7766 if (I != E) {
7767 // If only 1 GPR was available, there will only be one custom GPR and
7768 // the argument will also pass in memory.
7769 CCValAssign &PeekArg = ArgLocs[I];
7770 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7771 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7772 CCValAssign &GPR2 = ArgLocs[I++];
7773 RegsToPass.push_back(std::make_pair(
7774 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7775 }
7776 }
7777 }
7778 }
7779
7780 if (!MemOpChains.empty())
7781 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7782
7783 // For indirect calls, we need to save the TOC base to the stack for
7784 // restoration after the call.
7785 if (CFlags.IsIndirect) {
7786 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7787 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7788 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7789 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7790 const unsigned TOCSaveOffset =
7791 Subtarget.getFrameLowering()->getTOCSaveOffset();
7792
7793 setUsesTOCBasePtr(DAG);
7794 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7795 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7796 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7797 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7798 Chain = DAG.getStore(
7799 Val.getValue(1), dl, Val, AddPtr,
7800 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7801 }
7802
7803 // Build a sequence of copy-to-reg nodes chained together with token chain
7804 // and flag operands which copy the outgoing args into the appropriate regs.
7805 SDValue InGlue;
7806 for (auto Reg : RegsToPass) {
7807 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7808 InGlue = Chain.getValue(1);
7809 }
7810
7811 const int SPDiff = 0;
7812 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7813 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7814}
7815
7816bool
7817PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7818 MachineFunction &MF, bool isVarArg,
7820 LLVMContext &Context) const {
7822 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7823 return CCInfo.CheckReturn(
7824 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7826 : RetCC_PPC);
7827}
7828
7829SDValue
7830PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7831 bool isVarArg,
7833 const SmallVectorImpl<SDValue> &OutVals,
7834 const SDLoc &dl, SelectionDAG &DAG) const {
7836 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7837 *DAG.getContext());
7838 CCInfo.AnalyzeReturn(Outs,
7839 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7841 : RetCC_PPC);
7842
7843 SDValue Glue;
7844 SmallVector<SDValue, 4> RetOps(1, Chain);
7845
7846 // Copy the result values into the output registers.
7847 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7848 CCValAssign &VA = RVLocs[i];
7849 assert(VA.isRegLoc() && "Can only return in registers!");
7850
7851 SDValue Arg = OutVals[RealResIdx];
7852
7853 switch (VA.getLocInfo()) {
7854 default: llvm_unreachable("Unknown loc info!");
7855 case CCValAssign::Full: break;
7856 case CCValAssign::AExt:
7857 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7858 break;
7859 case CCValAssign::ZExt:
7860 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7861 break;
7862 case CCValAssign::SExt:
7863 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7864 break;
7865 }
7866 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7867 bool isLittleEndian = Subtarget.isLittleEndian();
7868 // Legalize ret f64 -> ret 2 x i32.
7869 SDValue SVal =
7870 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7871 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7872 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7873 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7874 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7875 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7876 Glue = Chain.getValue(1);
7877 VA = RVLocs[++i]; // skip ahead to next loc
7878 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7879 } else
7880 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7881 Glue = Chain.getValue(1);
7882 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7883 }
7884
7885 RetOps[0] = Chain; // Update chain.
7886
7887 // Add the glue if we have it.
7888 if (Glue.getNode())
7889 RetOps.push_back(Glue);
7890
7891 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7892}
7893
7894SDValue
7895PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7896 SelectionDAG &DAG) const {
7897 SDLoc dl(Op);
7898
7899 // Get the correct type for integers.
7900 EVT IntVT = Op.getValueType();
7901
7902 // Get the inputs.
7903 SDValue Chain = Op.getOperand(0);
7904 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7905 // Build a DYNAREAOFFSET node.
7906 SDValue Ops[2] = {Chain, FPSIdx};
7907 SDVTList VTs = DAG.getVTList(IntVT);
7908 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7909}
7910
7911SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7912 SelectionDAG &DAG) const {
7913 // When we pop the dynamic allocation we need to restore the SP link.
7914 SDLoc dl(Op);
7915
7916 // Get the correct type for pointers.
7917 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7918
7919 // Construct the stack pointer operand.
7920 bool isPPC64 = Subtarget.isPPC64();
7921 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7922 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7923
7924 // Get the operands for the STACKRESTORE.
7925 SDValue Chain = Op.getOperand(0);
7926 SDValue SaveSP = Op.getOperand(1);
7927
7928 // Load the old link SP.
7929 SDValue LoadLinkSP =
7930 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7931
7932 // Restore the stack pointer.
7933 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7934
7935 // Store the old link SP.
7936 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7937}
7938
7939SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7941 bool isPPC64 = Subtarget.isPPC64();
7942 EVT PtrVT = getPointerTy(MF.getDataLayout());
7943
7944 // Get current frame pointer save index. The users of this index will be
7945 // primarily DYNALLOC instructions.
7947 int RASI = FI->getReturnAddrSaveIndex();
7948
7949 // If the frame pointer save index hasn't been defined yet.
7950 if (!RASI) {
7951 // Find out what the fix offset of the frame pointer save area.
7952 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7953 // Allocate the frame index for frame pointer save area.
7954 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7955 // Save the result.
7956 FI->setReturnAddrSaveIndex(RASI);
7957 }
7958 return DAG.getFrameIndex(RASI, PtrVT);
7959}
7960
7961SDValue
7962PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7964 bool isPPC64 = Subtarget.isPPC64();
7965 EVT PtrVT = getPointerTy(MF.getDataLayout());
7966
7967 // Get current frame pointer save index. The users of this index will be
7968 // primarily DYNALLOC instructions.
7970 int FPSI = FI->getFramePointerSaveIndex();
7971
7972 // If the frame pointer save index hasn't been defined yet.
7973 if (!FPSI) {
7974 // Find out what the fix offset of the frame pointer save area.
7975 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7976 // Allocate the frame index for frame pointer save area.
7977 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7978 // Save the result.
7979 FI->setFramePointerSaveIndex(FPSI);
7980 }
7981 return DAG.getFrameIndex(FPSI, PtrVT);
7982}
7983
7984SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7985 SelectionDAG &DAG) const {
7987 // Get the inputs.
7988 SDValue Chain = Op.getOperand(0);
7989 SDValue Size = Op.getOperand(1);
7990 SDLoc dl(Op);
7991
7992 // Get the correct type for pointers.
7993 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7994 // Negate the size.
7995 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7996 DAG.getConstant(0, dl, PtrVT), Size);
7997 // Construct a node for the frame pointer save index.
7998 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7999 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8000 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8001 if (hasInlineStackProbe(MF))
8002 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8003 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8004}
8005
8006SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8007 SelectionDAG &DAG) const {
8009
8010 bool isPPC64 = Subtarget.isPPC64();
8011 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8012
8013 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8014 return DAG.getFrameIndex(FI, PtrVT);
8015}
8016
8017SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8018 SelectionDAG &DAG) const {
8019 SDLoc DL(Op);
8020 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8021 DAG.getVTList(MVT::i32, MVT::Other),
8022 Op.getOperand(0), Op.getOperand(1));
8023}
8024
8025SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8026 SelectionDAG &DAG) const {
8027 SDLoc DL(Op);
8028 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8029 Op.getOperand(0), Op.getOperand(1));
8030}
8031
8032SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8033 if (Op.getValueType().isVector())
8034 return LowerVectorLoad(Op, DAG);
8035
8036 assert(Op.getValueType() == MVT::i1 &&
8037 "Custom lowering only for i1 loads");
8038
8039 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8040
8041 SDLoc dl(Op);
8042 LoadSDNode *LD = cast<LoadSDNode>(Op);
8043
8044 SDValue Chain = LD->getChain();
8045 SDValue BasePtr = LD->getBasePtr();
8046 MachineMemOperand *MMO = LD->getMemOperand();
8047
8048 SDValue NewLD =
8049 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8050 BasePtr, MVT::i8, MMO);
8051 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8052
8053 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8054 return DAG.getMergeValues(Ops, dl);
8055}
8056
8057SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8058 if (Op.getOperand(1).getValueType().isVector())
8059 return LowerVectorStore(Op, DAG);
8060
8061 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8062 "Custom lowering only for i1 stores");
8063
8064 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8065
8066 SDLoc dl(Op);
8067 StoreSDNode *ST = cast<StoreSDNode>(Op);
8068
8069 SDValue Chain = ST->getChain();
8070 SDValue BasePtr = ST->getBasePtr();
8071 SDValue Value = ST->getValue();
8072 MachineMemOperand *MMO = ST->getMemOperand();
8073
8075 Value);
8076 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8077}
8078
8079// FIXME: Remove this once the ANDI glue bug is fixed:
8080SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8081 assert(Op.getValueType() == MVT::i1 &&
8082 "Custom lowering only for i1 results");
8083
8084 SDLoc DL(Op);
8085 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8086}
8087
8088SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8089 SelectionDAG &DAG) const {
8090
8091 // Implements a vector truncate that fits in a vector register as a shuffle.
8092 // We want to legalize vector truncates down to where the source fits in
8093 // a vector register (and target is therefore smaller than vector register
8094 // size). At that point legalization will try to custom lower the sub-legal
8095 // result and get here - where we can contain the truncate as a single target
8096 // operation.
8097
8098 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8099 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8100 //
8101 // We will implement it for big-endian ordering as this (where x denotes
8102 // undefined):
8103 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8104 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8105 //
8106 // The same operation in little-endian ordering will be:
8107 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8108 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8109
8110 EVT TrgVT = Op.getValueType();
8111 assert(TrgVT.isVector() && "Vector type expected.");
8112 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8113 EVT EltVT = TrgVT.getVectorElementType();
8114 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8115 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8116 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8117 return SDValue();
8118
8119 SDValue N1 = Op.getOperand(0);
8120 EVT SrcVT = N1.getValueType();
8121 unsigned SrcSize = SrcVT.getSizeInBits();
8122 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8123 !llvm::has_single_bit<uint32_t>(
8125 return SDValue();
8126 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8127 return SDValue();
8128
8129 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8130 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8131
8132 SDLoc DL(Op);
8133 SDValue Op1, Op2;
8134 if (SrcSize == 256) {
8135 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8136 EVT SplitVT =
8138 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8139 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8140 DAG.getConstant(0, DL, VecIdxTy));
8141 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8142 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8143 }
8144 else {
8145 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8146 Op2 = DAG.getUNDEF(WideVT);
8147 }
8148
8149 // First list the elements we want to keep.
8150 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8151 SmallVector<int, 16> ShuffV;
8152 if (Subtarget.isLittleEndian())
8153 for (unsigned i = 0; i < TrgNumElts; ++i)
8154 ShuffV.push_back(i * SizeMult);
8155 else
8156 for (unsigned i = 1; i <= TrgNumElts; ++i)
8157 ShuffV.push_back(i * SizeMult - 1);
8158
8159 // Populate the remaining elements with undefs.
8160 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8161 // ShuffV.push_back(i + WideNumElts);
8162 ShuffV.push_back(WideNumElts + 1);
8163
8164 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8165 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8166 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8167}
8168
8169/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8170/// possible.
8171SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8172 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8173 EVT ResVT = Op.getValueType();
8174 EVT CmpVT = Op.getOperand(0).getValueType();
8175 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8176 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8177 SDLoc dl(Op);
8178
8179 // Without power9-vector, we don't have native instruction for f128 comparison.
8180 // Following transformation to libcall is needed for setcc:
8181 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8182 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8183 SDValue Z = DAG.getSetCC(
8184 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8185 LHS, RHS, CC);
8186 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8187 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8188 }
8189
8190 // Not FP, or using SPE? Not a fsel.
8191 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8192 Subtarget.hasSPE())
8193 return Op;
8194
8195 SDNodeFlags Flags = Op.getNode()->getFlags();
8196
8197 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8198 // presence of infinities.
8199 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8200 switch (CC) {
8201 default:
8202 break;
8203 case ISD::SETOGT:
8204 case ISD::SETGT:
8205 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8206 case ISD::SETOLT:
8207 case ISD::SETLT:
8208 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8209 }
8210 }
8211
8212 // We might be able to do better than this under some circumstances, but in
8213 // general, fsel-based lowering of select is a finite-math-only optimization.
8214 // For more information, see section F.3 of the 2.06 ISA specification.
8215 // With ISA 3.0
8216 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8217 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8218 ResVT == MVT::f128)
8219 return Op;
8220
8221 // If the RHS of the comparison is a 0.0, we don't need to do the
8222 // subtraction at all.
8223 SDValue Sel1;
8224 if (isFloatingPointZero(RHS))
8225 switch (CC) {
8226 default: break; // SETUO etc aren't handled by fsel.
8227 case ISD::SETNE:
8228 std::swap(TV, FV);
8229 [[fallthrough]];
8230 case ISD::SETEQ:
8231 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8232 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8233 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8234 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8235 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8236 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8237 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8238 case ISD::SETULT:
8239 case ISD::SETLT:
8240 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8241 [[fallthrough]];
8242 case ISD::SETOGE:
8243 case ISD::SETGE:
8244 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8245 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8246 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8247 case ISD::SETUGT:
8248 case ISD::SETGT:
8249 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8250 [[fallthrough]];
8251 case ISD::SETOLE:
8252 case ISD::SETLE:
8253 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8254 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8255 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8256 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8257 }
8258
8259 SDValue Cmp;
8260 switch (CC) {
8261 default: break; // SETUO etc aren't handled by fsel.
8262 case ISD::SETNE:
8263 std::swap(TV, FV);
8264 [[fallthrough]];
8265 case ISD::SETEQ:
8266 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8267 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8268 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8269 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8270 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8271 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8272 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8273 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8274 case ISD::SETULT:
8275 case ISD::SETLT:
8276 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8277 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8278 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8279 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8280 case ISD::SETOGE:
8281 case ISD::SETGE:
8282 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8283 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8284 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8285 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8286 case ISD::SETUGT:
8287 case ISD::SETGT:
8288 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8289 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8290 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8291 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8292 case ISD::SETOLE:
8293 case ISD::SETLE:
8294 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8295 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8296 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8297 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8298 }
8299 return Op;
8300}
8301
8302static unsigned getPPCStrictOpcode(unsigned Opc) {
8303 switch (Opc) {
8304 default:
8305 llvm_unreachable("No strict version of this opcode!");
8306 case PPCISD::FCTIDZ:
8307 return PPCISD::STRICT_FCTIDZ;
8308 case PPCISD::FCTIWZ:
8309 return PPCISD::STRICT_FCTIWZ;
8310 case PPCISD::FCTIDUZ:
8312 case PPCISD::FCTIWUZ:
8314 case PPCISD::FCFID:
8315 return PPCISD::STRICT_FCFID;
8316 case PPCISD::FCFIDU:
8317 return PPCISD::STRICT_FCFIDU;
8318 case PPCISD::FCFIDS:
8319 return PPCISD::STRICT_FCFIDS;
8320 case PPCISD::FCFIDUS:
8322 }
8323}
8324
8326 const PPCSubtarget &Subtarget) {
8327 SDLoc dl(Op);
8328 bool IsStrict = Op->isStrictFPOpcode();
8329 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8330 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8331
8332 // TODO: Any other flags to propagate?
8333 SDNodeFlags Flags;
8334 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8335
8336 // For strict nodes, source is the second operand.
8337 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8338 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8339 MVT DestTy = Op.getSimpleValueType();
8340 assert(Src.getValueType().isFloatingPoint() &&
8341 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8342 DestTy == MVT::i64) &&
8343 "Invalid FP_TO_INT types");
8344 if (Src.getValueType() == MVT::f32) {
8345 if (IsStrict) {
8346 Src =
8348 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8349 Chain = Src.getValue(1);
8350 } else
8351 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8352 }
8353 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8354 DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8355 unsigned Opc = ISD::DELETED_NODE;
8356 switch (DestTy.SimpleTy) {
8357 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8358 case MVT::i32:
8359 Opc = IsSigned ? PPCISD::FCTIWZ
8360 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8361 break;
8362 case MVT::i64:
8363 assert((IsSigned || Subtarget.hasFPCVT()) &&
8364 "i64 FP_TO_UINT is supported only with FPCVT");
8365 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8366 }
8367 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8368 SDValue Conv;
8369 if (IsStrict) {
8370 Opc = getPPCStrictOpcode(Opc);
8371 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8372 Flags);
8373 } else {
8374 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8375 }
8376 return Conv;
8377}
8378
8379void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8380 SelectionDAG &DAG,
8381 const SDLoc &dl) const {
8382 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8383 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8384 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8385 bool IsStrict = Op->isStrictFPOpcode();
8386
8387 // Convert the FP value to an int value through memory.
8388 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8389 (IsSigned || Subtarget.hasFPCVT());
8390 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8391 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8392 MachinePointerInfo MPI =
8394
8395 // Emit a store to the stack slot.
8396 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8397 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8398 if (i32Stack) {
8400 Alignment = Align(4);
8401 MachineMemOperand *MMO =
8402 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8403 SDValue Ops[] = { Chain, Tmp, FIPtr };
8404 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8405 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8406 } else
8407 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8408
8409 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8410 // add in a bias on big endian.
8411 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8412 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8413 DAG.getConstant(4, dl, FIPtr.getValueType()));
8414 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8415 }
8416
8417 RLI.Chain = Chain;
8418 RLI.Ptr = FIPtr;
8419 RLI.MPI = MPI;
8420 RLI.Alignment = Alignment;
8421}
8422
8423/// Custom lowers floating point to integer conversions to use
8424/// the direct move instructions available in ISA 2.07 to avoid the
8425/// need for load/store combinations.
8426SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8427 SelectionDAG &DAG,
8428 const SDLoc &dl) const {
8429 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8430 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8431 if (Op->isStrictFPOpcode())
8432 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8433 else
8434 return Mov;
8435}
8436
8437SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8438 const SDLoc &dl) const {
8439 bool IsStrict = Op->isStrictFPOpcode();
8440 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8441 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8442 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8443 EVT SrcVT = Src.getValueType();
8444 EVT DstVT = Op.getValueType();
8445
8446 // FP to INT conversions are legal for f128.
8447 if (SrcVT == MVT::f128)
8448 return Subtarget.hasP9Vector() ? Op : SDValue();
8449
8450 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8451 // PPC (the libcall is not available).
8452 if (SrcVT == MVT::ppcf128) {
8453 if (DstVT == MVT::i32) {
8454 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8455 // set other fast-math flags to FP operations in both strict and
8456 // non-strict cases. (FP_TO_SINT, FSUB)
8458 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8459
8460 if (IsSigned) {
8461 SDValue Lo, Hi;
8462 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8463
8464 // Add the two halves of the long double in round-to-zero mode, and use
8465 // a smaller FP_TO_SINT.
8466 if (IsStrict) {
8468 DAG.getVTList(MVT::f64, MVT::Other),
8469 {Op.getOperand(0), Lo, Hi}, Flags);
8470 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8471 DAG.getVTList(MVT::i32, MVT::Other),
8472 {Res.getValue(1), Res}, Flags);
8473 } else {
8474 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8475 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8476 }
8477 } else {
8478 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8479 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8480 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8481 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8482 if (IsStrict) {
8483 // Sel = Src < 0x80000000
8484 // FltOfs = select Sel, 0.0, 0x80000000
8485 // IntOfs = select Sel, 0, 0x80000000
8486 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8487 SDValue Chain = Op.getOperand(0);
8488 EVT SetCCVT =
8489 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8490 EVT DstSetCCVT =
8491 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8492 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8493 Chain, true);
8494 Chain = Sel.getValue(1);
8495
8496 SDValue FltOfs = DAG.getSelect(
8497 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8498 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8499
8500 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8501 DAG.getVTList(SrcVT, MVT::Other),
8502 {Chain, Src, FltOfs}, Flags);
8503 Chain = Val.getValue(1);
8504 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8505 DAG.getVTList(DstVT, MVT::Other),
8506 {Chain, Val}, Flags);
8507 Chain = SInt.getValue(1);
8508 SDValue IntOfs = DAG.getSelect(
8509 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8510 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8511 return DAG.getMergeValues({Result, Chain}, dl);
8512 } else {
8513 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8514 // FIXME: generated code sucks.
8515 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8516 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8517 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8518 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8519 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8520 }
8521 }
8522 }
8523
8524 return SDValue();
8525 }
8526
8527 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8528 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8529
8530 ReuseLoadInfo RLI;
8531 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8532
8533 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8534 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8535}
8536
8537// We're trying to insert a regular store, S, and then a load, L. If the
8538// incoming value, O, is a load, we might just be able to have our load use the
8539// address used by O. However, we don't know if anything else will store to
8540// that address before we can load from it. To prevent this situation, we need
8541// to insert our load, L, into the chain as a peer of O. To do this, we give L
8542// the same chain operand as O, we create a token factor from the chain results
8543// of O and L, and we replace all uses of O's chain result with that token
8544// factor (see spliceIntoChain below for this last part).
8545bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8546 ReuseLoadInfo &RLI,
8547 SelectionDAG &DAG,
8548 ISD::LoadExtType ET) const {
8549 // Conservatively skip reusing for constrained FP nodes.
8550 if (Op->isStrictFPOpcode())
8551 return false;
8552
8553 SDLoc dl(Op);
8554 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8555 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8556 if (ET == ISD::NON_EXTLOAD &&
8557 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8558 isOperationLegalOrCustom(Op.getOpcode(),
8559 Op.getOperand(0).getValueType())) {
8560
8561 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8562 return true;
8563 }
8564
8565 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8566 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8567 LD->isNonTemporal())
8568 return false;
8569 if (LD->getMemoryVT() != MemVT)
8570 return false;
8571
8572 // If the result of the load is an illegal type, then we can't build a
8573 // valid chain for reuse since the legalised loads and token factor node that
8574 // ties the legalised loads together uses a different output chain then the
8575 // illegal load.
8576 if (!isTypeLegal(LD->getValueType(0)))
8577 return false;
8578
8579 RLI.Ptr = LD->getBasePtr();
8580 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8581 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8582 "Non-pre-inc AM on PPC?");
8583 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8584 LD->getOffset());
8585 }
8586
8587 RLI.Chain = LD->getChain();
8588 RLI.MPI = LD->getPointerInfo();
8589 RLI.IsDereferenceable = LD->isDereferenceable();
8590 RLI.IsInvariant = LD->isInvariant();
8591 RLI.Alignment = LD->getAlign();
8592 RLI.AAInfo = LD->getAAInfo();
8593 RLI.Ranges = LD->getRanges();
8594
8595 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8596 return true;
8597}
8598
8599// Given the head of the old chain, ResChain, insert a token factor containing
8600// it and NewResChain, and make users of ResChain now be users of that token
8601// factor.
8602// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8603void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8604 SDValue NewResChain,
8605 SelectionDAG &DAG) const {
8606 if (!ResChain)
8607 return;
8608
8609 SDLoc dl(NewResChain);
8610
8611 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8612 NewResChain, DAG.getUNDEF(MVT::Other));
8613 assert(TF.getNode() != NewResChain.getNode() &&
8614 "A new TF really is required here");
8615
8616 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8617 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8618}
8619
8620/// Analyze profitability of direct move
8621/// prefer float load to int load plus direct move
8622/// when there is no integer use of int load
8623bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8624 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8625 if (Origin->getOpcode() != ISD::LOAD)
8626 return true;
8627
8628 // If there is no LXSIBZX/LXSIHZX, like Power8,
8629 // prefer direct move if the memory size is 1 or 2 bytes.
8630 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8631 if (!Subtarget.hasP9Vector() &&
8632 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8633 return true;
8634
8635 for (SDNode::use_iterator UI = Origin->use_begin(),
8636 UE = Origin->use_end();
8637 UI != UE; ++UI) {
8638
8639 // Only look at the users of the loaded value.
8640 if (UI.getUse().get().getResNo() != 0)
8641 continue;
8642
8643 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8644 UI->getOpcode() != ISD::UINT_TO_FP &&
8645 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8646 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8647 return true;
8648 }
8649
8650 return false;
8651}
8652
8654 const PPCSubtarget &Subtarget,
8655 SDValue Chain = SDValue()) {
8656 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8657 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8658 SDLoc dl(Op);
8659
8660 // TODO: Any other flags to propagate?
8661 SDNodeFlags Flags;
8662 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8663
8664 // If we have FCFIDS, then use it when converting to single-precision.
8665 // Otherwise, convert to double-precision and then round.
8666 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8667 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8668 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8669 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8670 if (Op->isStrictFPOpcode()) {
8671 if (!Chain)
8672 Chain = Op.getOperand(0);
8673 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8674 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8675 } else
8676 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8677}
8678
8679/// Custom lowers integer to floating point conversions to use
8680/// the direct move instructions available in ISA 2.07 to avoid the
8681/// need for load/store combinations.
8682SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8683 SelectionDAG &DAG,
8684 const SDLoc &dl) const {
8685 assert((Op.getValueType() == MVT::f32 ||
8686 Op.getValueType() == MVT::f64) &&
8687 "Invalid floating point type as target of conversion");
8688 assert(Subtarget.hasFPCVT() &&
8689 "Int to FP conversions with direct moves require FPCVT");
8690 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8691 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8692 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8693 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8694 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8695 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8696 return convertIntToFP(Op, Mov, DAG, Subtarget);
8697}
8698
8699static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8700
8701 EVT VecVT = Vec.getValueType();
8702 assert(VecVT.isVector() && "Expected a vector type.");
8703 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8704
8705 EVT EltVT = VecVT.getVectorElementType();
8706 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8707 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8708
8709 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8710 SmallVector<SDValue, 16> Ops(NumConcat);
8711 Ops[0] = Vec;
8712 SDValue UndefVec = DAG.getUNDEF(VecVT);
8713 for (unsigned i = 1; i < NumConcat; ++i)
8714 Ops[i] = UndefVec;
8715
8716 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8717}
8718
8719SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8720 const SDLoc &dl) const {
8721 bool IsStrict = Op->isStrictFPOpcode();
8722 unsigned Opc = Op.getOpcode();
8723 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8724 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8726 "Unexpected conversion type");
8727 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8728 "Supports conversions to v2f64/v4f32 only.");
8729
8730 // TODO: Any other flags to propagate?
8732 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8733
8734 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8735 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8736
8737 SDValue Wide = widenVec(DAG, Src, dl);
8738 EVT WideVT = Wide.getValueType();
8739 unsigned WideNumElts = WideVT.getVectorNumElements();
8740 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8741
8742 SmallVector<int, 16> ShuffV;
8743 for (unsigned i = 0; i < WideNumElts; ++i)
8744 ShuffV.push_back(i + WideNumElts);
8745
8746 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8747 int SaveElts = FourEltRes ? 4 : 2;
8748 if (Subtarget.isLittleEndian())
8749 for (int i = 0; i < SaveElts; i++)
8750 ShuffV[i * Stride] = i;
8751 else
8752 for (int i = 1; i <= SaveElts; i++)
8753 ShuffV[i * Stride - 1] = i - 1;
8754
8755 SDValue ShuffleSrc2 =
8756 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8757 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8758
8759 SDValue Extend;
8760 if (SignedConv) {
8761 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8762 EVT ExtVT = Src.getValueType();
8763 if (Subtarget.hasP9Altivec())
8764 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8765 IntermediateVT.getVectorNumElements());
8766
8767 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8768 DAG.getValueType(ExtVT));
8769 } else
8770 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8771
8772 if (IsStrict)
8773 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8774 {Op.getOperand(0), Extend}, Flags);
8775
8776 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8777}
8778
8779SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8780 SelectionDAG &DAG) const {
8781 SDLoc dl(Op);
8782 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8783 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8784 bool IsStrict = Op->isStrictFPOpcode();
8785 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8786 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8787
8788 // TODO: Any other flags to propagate?
8790 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8791
8792 EVT InVT = Src.getValueType();
8793 EVT OutVT = Op.getValueType();
8794 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8795 isOperationCustom(Op.getOpcode(), InVT))
8796 return LowerINT_TO_FPVector(Op, DAG, dl);
8797
8798 // Conversions to f128 are legal.
8799 if (Op.getValueType() == MVT::f128)
8800 return Subtarget.hasP9Vector() ? Op : SDValue();
8801
8802 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8803 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8804 return SDValue();
8805
8806 if (Src.getValueType() == MVT::i1) {
8807 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8808 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8809 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8810 if (IsStrict)
8811 return DAG.getMergeValues({Sel, Chain}, dl);
8812 else
8813 return Sel;
8814 }
8815
8816 // If we have direct moves, we can do all the conversion, skip the store/load
8817 // however, without FPCVT we can't do most conversions.
8818 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8819 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8820 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8821
8822 assert((IsSigned || Subtarget.hasFPCVT()) &&
8823 "UINT_TO_FP is supported only with FPCVT");
8824
8825 if (Src.getValueType() == MVT::i64) {
8826 SDValue SINT = Src;
8827 // When converting to single-precision, we actually need to convert
8828 // to double-precision first and then round to single-precision.
8829 // To avoid double-rounding effects during that operation, we have
8830 // to prepare the input operand. Bits that might be truncated when
8831 // converting to double-precision are replaced by a bit that won't
8832 // be lost at this stage, but is below the single-precision rounding
8833 // position.
8834 //
8835 // However, if -enable-unsafe-fp-math is in effect, accept double
8836 // rounding to avoid the extra overhead.
8837 if (Op.getValueType() == MVT::f32 &&
8838 !Subtarget.hasFPCVT() &&
8840
8841 // Twiddle input to make sure the low 11 bits are zero. (If this
8842 // is the case, we are guaranteed the value will fit into the 53 bit
8843 // mantissa of an IEEE double-precision value without rounding.)
8844 // If any of those low 11 bits were not zero originally, make sure
8845 // bit 12 (value 2048) is set instead, so that the final rounding
8846 // to single-precision gets the correct result.
8847 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8848 SINT, DAG.getConstant(2047, dl, MVT::i64));
8849 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8850 Round, DAG.getConstant(2047, dl, MVT::i64));
8851 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8852 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8853 Round, DAG.getConstant(-2048, dl, MVT::i64));
8854
8855 // However, we cannot use that value unconditionally: if the magnitude
8856 // of the input value is small, the bit-twiddling we did above might
8857 // end up visibly changing the output. Fortunately, in that case, we
8858 // don't need to twiddle bits since the original input will convert
8859 // exactly to double-precision floating-point already. Therefore,
8860 // construct a conditional to use the original value if the top 11
8861 // bits are all sign-bit copies, and use the rounded value computed
8862 // above otherwise.
8863 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8864 SINT, DAG.getConstant(53, dl, MVT::i32));
8865 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8866 Cond, DAG.getConstant(1, dl, MVT::i64));
8867 Cond = DAG.getSetCC(
8868 dl,
8869 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8870 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8871
8872 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8873 }
8874
8875 ReuseLoadInfo RLI;
8876 SDValue Bits;
8877
8879 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8880 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8881 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8882 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8883 } else if (Subtarget.hasLFIWAX() &&
8884 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8885 MachineMemOperand *MMO =
8887 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8888 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8890 DAG.getVTList(MVT::f64, MVT::Other),
8891 Ops, MVT::i32, MMO);
8892 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8893 } else if (Subtarget.hasFPCVT() &&
8894 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8895 MachineMemOperand *MMO =
8897 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8898 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8900 DAG.getVTList(MVT::f64, MVT::Other),
8901 Ops, MVT::i32, MMO);
8902 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8903 } else if (((Subtarget.hasLFIWAX() &&
8904 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8905 (Subtarget.hasFPCVT() &&
8906 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8907 SINT.getOperand(0).getValueType() == MVT::i32) {
8908 MachineFrameInfo &MFI = MF.getFrameInfo();
8909 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8910
8911 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8912 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8913
8914 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8916 DAG.getMachineFunction(), FrameIdx));
8917 Chain = Store;
8918
8919 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8920 "Expected an i32 store");
8921
8922 RLI.Ptr = FIdx;
8923 RLI.Chain = Chain;
8924 RLI.MPI =
8926 RLI.Alignment = Align(4);
8927
8928 MachineMemOperand *MMO =
8930 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8931 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8934 dl, DAG.getVTList(MVT::f64, MVT::Other),
8935 Ops, MVT::i32, MMO);
8936 Chain = Bits.getValue(1);
8937 } else
8938 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8939
8940 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8941 if (IsStrict)
8942 Chain = FP.getValue(1);
8943
8944 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8945 if (IsStrict)
8947 DAG.getVTList(MVT::f32, MVT::Other),
8948 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8949 else
8950 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8951 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8952 }
8953 return FP;
8954 }
8955
8956 assert(Src.getValueType() == MVT::i32 &&
8957 "Unhandled INT_TO_FP type in custom expander!");
8958 // Since we only generate this in 64-bit mode, we can take advantage of
8959 // 64-bit registers. In particular, sign extend the input value into the
8960 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8961 // then lfd it and fcfid it.
8963 MachineFrameInfo &MFI = MF.getFrameInfo();
8964 EVT PtrVT = getPointerTy(MF.getDataLayout());
8965
8966 SDValue Ld;
8967 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8968 ReuseLoadInfo RLI;
8969 bool ReusingLoad;
8970 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8971 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8972 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8973
8974 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8976 DAG.getMachineFunction(), FrameIdx));
8977 Chain = Store;
8978
8979 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8980 "Expected an i32 store");
8981
8982 RLI.Ptr = FIdx;
8983 RLI.Chain = Chain;
8984 RLI.MPI =
8986 RLI.Alignment = Align(4);
8987 }
8988
8989 MachineMemOperand *MMO =
8991 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8992 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8993 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8994 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8995 MVT::i32, MMO);
8996 Chain = Ld.getValue(1);
8997 if (ReusingLoad)
8998 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8999 } else {
9000 assert(Subtarget.isPPC64() &&
9001 "i32->FP without LFIWAX supported only on PPC64");
9002
9003 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9004 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9005
9006 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9007
9008 // STD the extended value into the stack slot.
9009 SDValue Store = DAG.getStore(
9010 Chain, dl, Ext64, FIdx,
9012 Chain = Store;
9013
9014 // Load the value as a double.
9015 Ld = DAG.getLoad(
9016 MVT::f64, dl, Chain, FIdx,
9018 Chain = Ld.getValue(1);
9019 }
9020
9021 // FCFID it and return it.
9022 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9023 if (IsStrict)
9024 Chain = FP.getValue(1);
9025 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9026 if (IsStrict)
9028 DAG.getVTList(MVT::f32, MVT::Other),
9029 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
9030 else
9031 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9032 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9033 }
9034 return FP;
9035}
9036
9037SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9038 SelectionDAG &DAG) const {
9039 SDLoc dl(Op);
9040 /*
9041 The rounding mode is in bits 30:31 of FPSR, and has the following
9042 settings:
9043 00 Round to nearest
9044 01 Round to 0
9045 10 Round to +inf
9046 11 Round to -inf
9047
9048 GET_ROUNDING, on the other hand, expects the following:
9049 -1 Undefined
9050 0 Round to 0
9051 1 Round to nearest
9052 2 Round to +inf
9053 3 Round to -inf
9054
9055 To perform the conversion, we do:
9056 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9057 */
9058
9060 EVT VT = Op.getValueType();
9061 EVT PtrVT = getPointerTy(MF.getDataLayout());
9062
9063 // Save FP Control Word to register
9064 SDValue Chain = Op.getOperand(0);
9065 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9066 Chain = MFFS.getValue(1);
9067
9068 SDValue CWD;
9069 if (isTypeLegal(MVT::i64)) {
9070 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9071 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9072 } else {
9073 // Save FP register to stack slot
9074 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9075 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9076 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9077
9078 // Load FP Control Word from low 32 bits of stack slot.
9080 "Stack slot adjustment is valid only on big endian subtargets!");
9081 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9082 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9083 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9084 Chain = CWD.getValue(1);
9085 }
9086
9087 // Transform as necessary
9088 SDValue CWD1 =
9089 DAG.getNode(ISD::AND, dl, MVT::i32,
9090 CWD, DAG.getConstant(3, dl, MVT::i32));
9091 SDValue CWD2 =
9092 DAG.getNode(ISD::SRL, dl, MVT::i32,
9093 DAG.getNode(ISD::AND, dl, MVT::i32,
9094 DAG.getNode(ISD::XOR, dl, MVT::i32,
9095 CWD, DAG.getConstant(3, dl, MVT::i32)),
9096 DAG.getConstant(3, dl, MVT::i32)),
9097 DAG.getConstant(1, dl, MVT::i32));
9098
9099 SDValue RetVal =
9100 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9101
9102 RetVal =
9104 dl, VT, RetVal);
9105
9106 return DAG.getMergeValues({RetVal, Chain}, dl);
9107}
9108
9109SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9110 EVT VT = Op.getValueType();
9111 unsigned BitWidth = VT.getSizeInBits();
9112 SDLoc dl(Op);
9113 assert(Op.getNumOperands() == 3 &&
9114 VT == Op.getOperand(1).getValueType() &&
9115 "Unexpected SHL!");
9116
9117 // Expand into a bunch of logical ops. Note that these ops
9118 // depend on the PPC behavior for oversized shift amounts.
9119 SDValue Lo = Op.getOperand(0);
9120 SDValue Hi = Op.getOperand(1);
9121 SDValue Amt = Op.getOperand(2);
9122 EVT AmtVT = Amt.getValueType();
9123
9124 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9125 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9126 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9127 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9128 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9129 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9130 DAG.getConstant(-BitWidth, dl, AmtVT));
9131 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9132 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9133 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9134 SDValue OutOps[] = { OutLo, OutHi };
9135 return DAG.getMergeValues(OutOps, dl);
9136}
9137
9138SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9139 EVT VT = Op.getValueType();
9140 SDLoc dl(Op);
9141 unsigned BitWidth = VT.getSizeInBits();
9142 assert(Op.getNumOperands() == 3 &&
9143 VT == Op.getOperand(1).getValueType() &&
9144 "Unexpected SRL!");
9145
9146 // Expand into a bunch of logical ops. Note that these ops
9147 // depend on the PPC behavior for oversized shift amounts.
9148 SDValue Lo = Op.getOperand(0);
9149 SDValue Hi = Op.getOperand(1);
9150 SDValue Amt = Op.getOperand(2);
9151 EVT AmtVT = Amt.getValueType();
9152
9153 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9154 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9155 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9156 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9157 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9158 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9159 DAG.getConstant(-BitWidth, dl, AmtVT));
9160 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9161 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9162 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9163 SDValue OutOps[] = { OutLo, OutHi };
9164 return DAG.getMergeValues(OutOps, dl);
9165}
9166
9167SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9168 SDLoc dl(Op);
9169 EVT VT = Op.getValueType();
9170 unsigned BitWidth = VT.getSizeInBits();
9171 assert(Op.getNumOperands() == 3 &&
9172 VT == Op.getOperand(1).getValueType() &&
9173 "Unexpected SRA!");
9174
9175 // Expand into a bunch of logical ops, followed by a select_cc.
9176 SDValue Lo = Op.getOperand(0);
9177 SDValue Hi = Op.getOperand(1);
9178 SDValue Amt = Op.getOperand(2);
9179 EVT AmtVT = Amt.getValueType();
9180
9181 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9182 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9183 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9184 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9185 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9186 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9187 DAG.getConstant(-BitWidth, dl, AmtVT));
9188 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9189 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9190 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9191 Tmp4, Tmp6, ISD::SETLE);
9192 SDValue OutOps[] = { OutLo, OutHi };
9193 return DAG.getMergeValues(OutOps, dl);
9194}
9195
9196SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9197 SelectionDAG &DAG) const {
9198 SDLoc dl(Op);
9199 EVT VT = Op.getValueType();
9200 unsigned BitWidth = VT.getSizeInBits();
9201
9202 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9203 SDValue X = Op.getOperand(0);
9204 SDValue Y = Op.getOperand(1);
9205 SDValue Z = Op.getOperand(2);
9206 EVT AmtVT = Z.getValueType();
9207
9208 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9209 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9210 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9211 // on PowerPC shift by BW being well defined.
9212 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9213 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9214 SDValue SubZ =
9215 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9216 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9217 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9218 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9219}
9220
9221//===----------------------------------------------------------------------===//
9222// Vector related lowering.
9223//
9224
9225/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9226/// element size of SplatSize. Cast the result to VT.
9227static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9228 SelectionDAG &DAG, const SDLoc &dl) {
9229 static const MVT VTys[] = { // canonical VT to use for each size.
9230 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9231 };
9232
9233 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9234
9235 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9236 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9237 SplatSize = 1;
9238 Val = 0xFF;
9239 }
9240
9241 EVT CanonicalVT = VTys[SplatSize-1];
9242
9243 // Build a canonical splat for this value.
9244 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9245}
9246
9247/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9248/// specified intrinsic ID.
9250 const SDLoc &dl, EVT DestVT = MVT::Other) {
9251 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9252 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9253 DAG.getConstant(IID, dl, MVT::i32), Op);
9254}
9255
9256/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9257/// specified intrinsic ID.
9258static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9259 SelectionDAG &DAG, const SDLoc &dl,
9260 EVT DestVT = MVT::Other) {
9261 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9262 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9263 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9264}
9265
9266/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9267/// specified intrinsic ID.
9268static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9269 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9270 EVT DestVT = MVT::Other) {
9271 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9272 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9273 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9274}
9275
9276/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9277/// amount. The result has the specified value type.
9278static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9279 SelectionDAG &DAG, const SDLoc &dl) {
9280 // Force LHS/RHS to be the right type.
9281 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9282 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9283
9284 int Ops[16];
9285 for (unsigned i = 0; i != 16; ++i)
9286 Ops[i] = i + Amt;
9287 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9288 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9289}
9290
9291/// Do we have an efficient pattern in a .td file for this node?
9292///
9293/// \param V - pointer to the BuildVectorSDNode being matched
9294/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9295///
9296/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9297/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9298/// the opposite is true (expansion is beneficial) are:
9299/// - The node builds a vector out of integers that are not 32 or 64-bits
9300/// - The node builds a vector out of constants
9301/// - The node is a "load-and-splat"
9302/// In all other cases, we will choose to keep the BUILD_VECTOR.
9304 bool HasDirectMove,
9305 bool HasP8Vector) {
9306 EVT VecVT = V->getValueType(0);
9307 bool RightType = VecVT == MVT::v2f64 ||
9308 (HasP8Vector && VecVT == MVT::v4f32) ||
9309 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9310 if (!RightType)
9311 return false;
9312
9313 bool IsSplat = true;
9314 bool IsLoad = false;
9315 SDValue Op0 = V->getOperand(0);
9316
9317 // This function is called in a block that confirms the node is not a constant
9318 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9319 // different constants.
9320 if (V->isConstant())
9321 return false;
9322 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9323 if (V->getOperand(i).isUndef())
9324 return false;
9325 // We want to expand nodes that represent load-and-splat even if the
9326 // loaded value is a floating point truncation or conversion to int.
9327 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9328 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9329 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9330 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9331 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9332 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9333 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9334 IsLoad = true;
9335 // If the operands are different or the input is not a load and has more
9336 // uses than just this BV node, then it isn't a splat.
9337 if (V->getOperand(i) != Op0 ||
9338 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9339 IsSplat = false;
9340 }
9341 return !(IsSplat && IsLoad);
9342}
9343
9344// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9345SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9346
9347 SDLoc dl(Op);
9348 SDValue Op0 = Op->getOperand(0);
9349
9350 if ((Op.getValueType() != MVT::f128) ||
9351 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9352 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9353 (Op0.getOperand(1).getValueType() != MVT::i64) || !Subtarget.isPPC64())
9354 return SDValue();
9355
9356 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9357 Op0.getOperand(1));
9358}
9359
9360static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9361 const SDValue *InputLoad = &Op;
9362 while (InputLoad->getOpcode() == ISD::BITCAST)
9363 InputLoad = &InputLoad->getOperand(0);
9364 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9366 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9367 InputLoad = &InputLoad->getOperand(0);
9368 }
9369 if (InputLoad->getOpcode() != ISD::LOAD)
9370 return nullptr;
9371 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9372 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9373}
9374
9375// Convert the argument APFloat to a single precision APFloat if there is no
9376// loss in information during the conversion to single precision APFloat and the
9377// resulting number is not a denormal number. Return true if successful.
9379 APFloat APFloatToConvert = ArgAPFloat;
9380 bool LosesInfo = true;
9382 &LosesInfo);
9383 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9384 if (Success)
9385 ArgAPFloat = APFloatToConvert;
9386 return Success;
9387}
9388
9389// Bitcast the argument APInt to a double and convert it to a single precision
9390// APFloat, bitcast the APFloat to an APInt and assign it to the original
9391// argument if there is no loss in information during the conversion from
9392// double to single precision APFloat and the resulting number is not a denormal
9393// number. Return true if successful.
9395 double DpValue = ArgAPInt.bitsToDouble();
9396 APFloat APFloatDp(DpValue);
9397 bool Success = convertToNonDenormSingle(APFloatDp);
9398 if (Success)
9399 ArgAPInt = APFloatDp.bitcastToAPInt();
9400 return Success;
9401}
9402
9403// Nondestructive check for convertTonNonDenormSingle.
9405 // Only convert if it loses info, since XXSPLTIDP should
9406 // handle the other case.
9407 APFloat APFloatToConvert = ArgAPFloat;
9408 bool LosesInfo = true;
9410 &LosesInfo);
9411
9412 return (!LosesInfo && !APFloatToConvert.isDenormal());
9413}
9414
9415static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9416 unsigned &Opcode) {
9417 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9418 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9419 return false;
9420
9421 EVT Ty = Op->getValueType(0);
9422 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9423 // as we cannot handle extending loads for these types.
9424 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9425 ISD::isNON_EXTLoad(InputNode))
9426 return true;
9427
9428 EVT MemVT = InputNode->getMemoryVT();
9429 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9430 // memory VT is the same vector element VT type.
9431 // The loads feeding into the v8i16 and v16i8 types will be extending because
9432 // scalar i8/i16 are not legal types.
9433 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9434 (MemVT == Ty.getVectorElementType()))
9435 return true;
9436
9437 if (Ty == MVT::v2i64) {
9438 // Check the extend type, when the input type is i32, and the output vector
9439 // type is v2i64.
9440 if (MemVT == MVT::i32) {
9441 if (ISD::isZEXTLoad(InputNode))
9442 Opcode = PPCISD::ZEXT_LD_SPLAT;
9443 if (ISD::isSEXTLoad(InputNode))
9444 Opcode = PPCISD::SEXT_LD_SPLAT;
9445 }
9446 return true;
9447 }
9448 return false;
9449}
9450
9451// If this is a case we can't handle, return null and let the default
9452// expansion code take care of it. If we CAN select this case, and if it
9453// selects to a single instruction, return Op. Otherwise, if we can codegen
9454// this case more efficiently than a constant pool load, lower it to the
9455// sequence of ops that should be used.
9456SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9457 SelectionDAG &DAG) const {
9458 SDLoc dl(Op);
9459 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9460 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9461
9462 // Check if this is a splat of a constant value.
9463 APInt APSplatBits, APSplatUndef;
9464 unsigned SplatBitSize;
9465 bool HasAnyUndefs;
9466 bool BVNIsConstantSplat =
9467 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9468 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9469
9470 // If it is a splat of a double, check if we can shrink it to a 32 bit
9471 // non-denormal float which when converted back to double gives us the same
9472 // double. This is to exploit the XXSPLTIDP instruction.
9473 // If we lose precision, we use XXSPLTI32DX.
9474 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9475 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9476 // Check the type first to short-circuit so we don't modify APSplatBits if
9477 // this block isn't executed.
9478 if ((Op->getValueType(0) == MVT::v2f64) &&
9479 convertToNonDenormSingle(APSplatBits)) {
9480 SDValue SplatNode = DAG.getNode(
9481 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9482 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9483 return DAG.getBitcast(Op.getValueType(), SplatNode);
9484 } else {
9485 // We may lose precision, so we have to use XXSPLTI32DX.
9486
9487 uint32_t Hi =
9488 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9489 uint32_t Lo =
9490 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9491 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9492
9493 if (!Hi || !Lo)
9494 // If either load is 0, then we should generate XXLXOR to set to 0.
9495 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9496
9497 if (Hi)
9498 SplatNode = DAG.getNode(
9499 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9500 DAG.getTargetConstant(0, dl, MVT::i32),
9501 DAG.getTargetConstant(Hi, dl, MVT::i32));
9502
9503 if (Lo)
9504 SplatNode =
9505 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9506 DAG.getTargetConstant(1, dl, MVT::i32),
9507 DAG.getTargetConstant(Lo, dl, MVT::i32));
9508
9509 return DAG.getBitcast(Op.getValueType(), SplatNode);
9510 }
9511 }
9512
9513 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9514 unsigned NewOpcode = PPCISD::LD_SPLAT;
9515
9516 // Handle load-and-splat patterns as we have instructions that will do this
9517 // in one go.
9518 if (DAG.isSplatValue(Op, true) &&
9519 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9520 const SDValue *InputLoad = &Op.getOperand(0);
9521 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9522
9523 // If the input load is an extending load, it will be an i32 -> i64
9524 // extending load and isValidSplatLoad() will update NewOpcode.
9525 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9526 unsigned ElementSize =
9527 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9528
9529 assert(((ElementSize == 2 * MemorySize)
9530 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9531 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9532 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9533 "Unmatched element size and opcode!\n");
9534
9535 // Checking for a single use of this load, we have to check for vector
9536 // width (128 bits) / ElementSize uses (since each operand of the
9537 // BUILD_VECTOR is a separate use of the value.
9538 unsigned NumUsesOfInputLD = 128 / ElementSize;
9539 for (SDValue BVInOp : Op->ops())
9540 if (BVInOp.isUndef())
9541 NumUsesOfInputLD--;
9542
9543 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9544 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9545 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9546 // 15", but function IsValidSplatLoad() now will only return true when
9547 // the data at index 0 is not nullptr. So we will not get into trouble for
9548 // these cases.
9549 //
9550 // case 1 - lfiwzx/lfiwax
9551 // 1.1: load result is i32 and is sign/zero extend to i64;
9552 // 1.2: build a v2i64 vector type with above loaded value;
9553 // 1.3: the vector has only one value at index 0, others are all undef;
9554 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9555 if (NumUsesOfInputLD == 1 &&
9556 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9557 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9558 Subtarget.hasLFIWAX()))
9559 return SDValue();
9560
9561 // case 2 - lxvr[hb]x
9562 // 2.1: load result is at most i16;
9563 // 2.2: build a vector with above loaded value;
9564 // 2.3: the vector has only one value at index 0, others are all undef;
9565 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9566 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9567 Subtarget.isISA3_1() && ElementSize <= 16)
9568 return SDValue();
9569
9570 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9571 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9572 Subtarget.hasVSX()) {
9573 SDValue Ops[] = {
9574 LD->getChain(), // Chain
9575 LD->getBasePtr(), // Ptr
9576 DAG.getValueType(Op.getValueType()) // VT
9577 };
9578 SDValue LdSplt = DAG.getMemIntrinsicNode(
9579 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9580 LD->getMemoryVT(), LD->getMemOperand());
9581 // Replace all uses of the output chain of the original load with the
9582 // output chain of the new load.
9583 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9584 LdSplt.getValue(1));
9585 return LdSplt;
9586 }
9587 }
9588
9589 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9590 // 32-bits can be lowered to VSX instructions under certain conditions.
9591 // Without VSX, there is no pattern more efficient than expanding the node.
9592 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9593 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9594 Subtarget.hasP8Vector()))
9595 return Op;
9596 return SDValue();
9597 }
9598
9599 uint64_t SplatBits = APSplatBits.getZExtValue();
9600 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9601 unsigned SplatSize = SplatBitSize / 8;
9602
9603 // First, handle single instruction cases.
9604
9605 // All zeros?
9606 if (SplatBits == 0) {
9607 // Canonicalize all zero vectors to be v4i32.
9608 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9609 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9610 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9611 }
9612 return Op;
9613 }
9614
9615 // We have XXSPLTIW for constant splats four bytes wide.
9616 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9617 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9618 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9619 // turned into a 4-byte splat of 0xABABABAB.
9620 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9621 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9622 Op.getValueType(), DAG, dl);
9623
9624 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9625 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9626 dl);
9627
9628 // We have XXSPLTIB for constant splats one byte wide.
9629 if (Subtarget.hasP9Vector() && SplatSize == 1)
9630 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9631 dl);
9632
9633 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9634 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9635 (32-SplatBitSize));
9636 if (SextVal >= -16 && SextVal <= 15)
9637 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9638 dl);
9639
9640 // Two instruction sequences.
9641
9642 // If this value is in the range [-32,30] and is even, use:
9643 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9644 // If this value is in the range [17,31] and is odd, use:
9645 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9646 // If this value is in the range [-31,-17] and is odd, use:
9647 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9648 // Note the last two are three-instruction sequences.
9649 if (SextVal >= -32 && SextVal <= 31) {
9650 // To avoid having these optimizations undone by constant folding,
9651 // we convert to a pseudo that will be expanded later into one of
9652 // the above forms.
9653 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9654 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9655 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9656 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9657 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9658 if (VT == Op.getValueType())
9659 return RetVal;
9660 else
9661 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9662 }
9663
9664 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9665 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9666 // for fneg/fabs.
9667 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9668 // Make -1 and vspltisw -1:
9669 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9670
9671 // Make the VSLW intrinsic, computing 0x8000_0000.
9672 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9673 OnesV, DAG, dl);
9674
9675 // xor by OnesV to invert it.
9676 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9677 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9678 }
9679
9680 // Check to see if this is a wide variety of vsplti*, binop self cases.
9681 static const signed char SplatCsts[] = {
9682 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9683 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9684 };
9685
9686 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9687 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9688 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9689 int i = SplatCsts[idx];
9690
9691 // Figure out what shift amount will be used by altivec if shifted by i in
9692 // this splat size.
9693 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9694
9695 // vsplti + shl self.
9696 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9697 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9698 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9699 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9700 Intrinsic::ppc_altivec_vslw
9701 };
9702 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9703 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9704 }
9705
9706 // vsplti + srl self.
9707 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9708 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9709 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9710 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9711 Intrinsic::ppc_altivec_vsrw
9712 };
9713 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9714 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9715 }
9716
9717 // vsplti + rol self.
9718 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9719 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9720 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9721 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9722 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9723 Intrinsic::ppc_altivec_vrlw
9724 };
9725 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9726 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9727 }
9728
9729 // t = vsplti c, result = vsldoi t, t, 1
9730 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9731 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9732 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9733 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9734 }
9735 // t = vsplti c, result = vsldoi t, t, 2
9736 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9737 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9738 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9739 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9740 }
9741 // t = vsplti c, result = vsldoi t, t, 3
9742 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9743 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9744 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9745 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9746 }
9747 }
9748
9749 return SDValue();
9750}
9751
9752/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9753/// the specified operations to build the shuffle.
9754static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9755 SDValue RHS, SelectionDAG &DAG,
9756 const SDLoc &dl) {
9757 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9758 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9759 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9760
9761 enum {
9762 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9763 OP_VMRGHW,
9764 OP_VMRGLW,
9765 OP_VSPLTISW0,
9766 OP_VSPLTISW1,
9767 OP_VSPLTISW2,
9768 OP_VSPLTISW3,
9769 OP_VSLDOI4,
9770 OP_VSLDOI8,
9771 OP_VSLDOI12
9772 };
9773
9774 if (OpNum == OP_COPY) {
9775 if (LHSID == (1*9+2)*9+3) return LHS;
9776 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9777 return RHS;
9778 }
9779
9780 SDValue OpLHS, OpRHS;
9781 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9782 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9783
9784 int ShufIdxs[16];
9785 switch (OpNum) {
9786 default: llvm_unreachable("Unknown i32 permute!");
9787 case OP_VMRGHW:
9788 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9789 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9790 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9791 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9792 break;
9793 case OP_VMRGLW:
9794 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9795 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9796 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9797 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9798 break;
9799 case OP_VSPLTISW0:
9800 for (unsigned i = 0; i != 16; ++i)
9801 ShufIdxs[i] = (i&3)+0;
9802 break;
9803 case OP_VSPLTISW1:
9804 for (unsigned i = 0; i != 16; ++i)
9805 ShufIdxs[i] = (i&3)+4;
9806 break;
9807 case OP_VSPLTISW2:
9808 for (unsigned i = 0; i != 16; ++i)
9809 ShufIdxs[i] = (i&3)+8;
9810 break;
9811 case OP_VSPLTISW3:
9812 for (unsigned i = 0; i != 16; ++i)
9813 ShufIdxs[i] = (i&3)+12;
9814 break;
9815 case OP_VSLDOI4:
9816 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9817 case OP_VSLDOI8:
9818 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9819 case OP_VSLDOI12:
9820 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9821 }
9822 EVT VT = OpLHS.getValueType();
9823 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9824 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9825 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9826 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9827}
9828
9829/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9830/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9831/// SDValue.
9832SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9833 SelectionDAG &DAG) const {
9834 const unsigned BytesInVector = 16;
9835 bool IsLE = Subtarget.isLittleEndian();
9836 SDLoc dl(N);
9837 SDValue V1 = N->getOperand(0);
9838 SDValue V2 = N->getOperand(1);
9839 unsigned ShiftElts = 0, InsertAtByte = 0;
9840 bool Swap = false;
9841
9842 // Shifts required to get the byte we want at element 7.
9843 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9844 0, 15, 14, 13, 12, 11, 10, 9};
9845 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9846 1, 2, 3, 4, 5, 6, 7, 8};
9847
9848 ArrayRef<int> Mask = N->getMask();
9849 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9850
9851 // For each mask element, find out if we're just inserting something
9852 // from V2 into V1 or vice versa.
9853 // Possible permutations inserting an element from V2 into V1:
9854 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9855 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9856 // ...
9857 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9858 // Inserting from V1 into V2 will be similar, except mask range will be
9859 // [16,31].
9860
9861 bool FoundCandidate = false;
9862 // If both vector operands for the shuffle are the same vector, the mask
9863 // will contain only elements from the first one and the second one will be
9864 // undef.
9865 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9866 // Go through the mask of half-words to find an element that's being moved
9867 // from one vector to the other.
9868 for (unsigned i = 0; i < BytesInVector; ++i) {
9869 unsigned CurrentElement = Mask[i];
9870 // If 2nd operand is undefined, we should only look for element 7 in the
9871 // Mask.
9872 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9873 continue;
9874
9875 bool OtherElementsInOrder = true;
9876 // Examine the other elements in the Mask to see if they're in original
9877 // order.
9878 for (unsigned j = 0; j < BytesInVector; ++j) {
9879 if (j == i)
9880 continue;
9881 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9882 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9883 // in which we always assume we're always picking from the 1st operand.
9884 int MaskOffset =
9885 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9886 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9887 OtherElementsInOrder = false;
9888 break;
9889 }
9890 }
9891 // If other elements are in original order, we record the number of shifts
9892 // we need to get the element we want into element 7. Also record which byte
9893 // in the vector we should insert into.
9894 if (OtherElementsInOrder) {
9895 // If 2nd operand is undefined, we assume no shifts and no swapping.
9896 if (V2.isUndef()) {
9897 ShiftElts = 0;
9898 Swap = false;
9899 } else {
9900 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9901 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9902 : BigEndianShifts[CurrentElement & 0xF];
9903 Swap = CurrentElement < BytesInVector;
9904 }
9905 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9906 FoundCandidate = true;
9907 break;
9908 }
9909 }
9910
9911 if (!FoundCandidate)
9912 return SDValue();
9913
9914 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9915 // optionally with VECSHL if shift is required.
9916 if (Swap)
9917 std::swap(V1, V2);
9918 if (V2.isUndef())
9919 V2 = V1;
9920 if (ShiftElts) {
9921 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9922 DAG.getConstant(ShiftElts, dl, MVT::i32));
9923 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9924 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9925 }
9926 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9927 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9928}
9929
9930/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9931/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9932/// SDValue.
9933SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9934 SelectionDAG &DAG) const {
9935 const unsigned NumHalfWords = 8;
9936 const unsigned BytesInVector = NumHalfWords * 2;
9937 // Check that the shuffle is on half-words.
9938 if (!isNByteElemShuffleMask(N, 2, 1))
9939 return SDValue();
9940
9941 bool IsLE = Subtarget.isLittleEndian();
9942 SDLoc dl(N);
9943 SDValue V1 = N->getOperand(0);
9944 SDValue V2 = N->getOperand(1);
9945 unsigned ShiftElts = 0, InsertAtByte = 0;
9946 bool Swap = false;
9947
9948 // Shifts required to get the half-word we want at element 3.
9949 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9950 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9951
9952 uint32_t Mask = 0;
9953 uint32_t OriginalOrderLow = 0x1234567;
9954 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9955 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9956 // 32-bit space, only need 4-bit nibbles per element.
9957 for (unsigned i = 0; i < NumHalfWords; ++i) {
9958 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9959 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9960 }
9961
9962 // For each mask element, find out if we're just inserting something
9963 // from V2 into V1 or vice versa. Possible permutations inserting an element
9964 // from V2 into V1:
9965 // X, 1, 2, 3, 4, 5, 6, 7
9966 // 0, X, 2, 3, 4, 5, 6, 7
9967 // 0, 1, X, 3, 4, 5, 6, 7
9968 // 0, 1, 2, X, 4, 5, 6, 7
9969 // 0, 1, 2, 3, X, 5, 6, 7
9970 // 0, 1, 2, 3, 4, X, 6, 7
9971 // 0, 1, 2, 3, 4, 5, X, 7
9972 // 0, 1, 2, 3, 4, 5, 6, X
9973 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9974
9975 bool FoundCandidate = false;
9976 // Go through the mask of half-words to find an element that's being moved
9977 // from one vector to the other.
9978 for (unsigned i = 0; i < NumHalfWords; ++i) {
9979 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9980 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9981 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9982 uint32_t TargetOrder = 0x0;
9983
9984 // If both vector operands for the shuffle are the same vector, the mask
9985 // will contain only elements from the first one and the second one will be
9986 // undef.
9987 if (V2.isUndef()) {
9988 ShiftElts = 0;
9989 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9990 TargetOrder = OriginalOrderLow;
9991 Swap = false;
9992 // Skip if not the correct element or mask of other elements don't equal
9993 // to our expected order.
9994 if (MaskOneElt == VINSERTHSrcElem &&
9995 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9996 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9997 FoundCandidate = true;
9998 break;
9999 }
10000 } else { // If both operands are defined.
10001 // Target order is [8,15] if the current mask is between [0,7].
10002 TargetOrder =
10003 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10004 // Skip if mask of other elements don't equal our expected order.
10005 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10006 // We only need the last 3 bits for the number of shifts.
10007 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10008 : BigEndianShifts[MaskOneElt & 0x7];
10009 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10010 Swap = MaskOneElt < NumHalfWords;
10011 FoundCandidate = true;
10012 break;
10013 }
10014 }
10015 }
10016
10017 if (!FoundCandidate)
10018 return SDValue();
10019
10020 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10021 // optionally with VECSHL if shift is required.
10022 if (Swap)
10023 std::swap(V1, V2);
10024 if (V2.isUndef())
10025 V2 = V1;
10026 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10027 if (ShiftElts) {
10028 // Double ShiftElts because we're left shifting on v16i8 type.
10029 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10030 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10031 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10032 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10033 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10034 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10035 }
10036 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10037 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10038 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10039 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10040}
10041
10042/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10043/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10044/// return the default SDValue.
10045SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10046 SelectionDAG &DAG) const {
10047 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10048 // to v16i8. Peek through the bitcasts to get the actual operands.
10051
10052 auto ShuffleMask = SVN->getMask();
10053 SDValue VecShuffle(SVN, 0);
10054 SDLoc DL(SVN);
10055
10056 // Check that we have a four byte shuffle.
10057 if (!isNByteElemShuffleMask(SVN, 4, 1))
10058 return SDValue();
10059
10060 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10061 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10062 std::swap(LHS, RHS);
10064 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10065 if (!CommutedSV)
10066 return SDValue();
10067 ShuffleMask = CommutedSV->getMask();
10068 }
10069
10070 // Ensure that the RHS is a vector of constants.
10071 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10072 if (!BVN)
10073 return SDValue();
10074
10075 // Check if RHS is a splat of 4-bytes (or smaller).
10076 APInt APSplatValue, APSplatUndef;
10077 unsigned SplatBitSize;
10078 bool HasAnyUndefs;
10079 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10080 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10081 SplatBitSize > 32)
10082 return SDValue();
10083
10084 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10085 // The instruction splats a constant C into two words of the source vector
10086 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10087 // Thus we check that the shuffle mask is the equivalent of
10088 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10089 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10090 // within each word are consecutive, so we only need to check the first byte.
10091 SDValue Index;
10092 bool IsLE = Subtarget.isLittleEndian();
10093 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10094 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10095 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10096 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10097 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10098 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10099 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10100 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10101 else
10102 return SDValue();
10103
10104 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10105 // for XXSPLTI32DX.
10106 unsigned SplatVal = APSplatValue.getZExtValue();
10107 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10108 SplatVal |= (SplatVal << SplatBitSize);
10109
10110 SDValue SplatNode = DAG.getNode(
10111 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10112 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10113 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10114}
10115
10116/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10117/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10118/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10119/// i.e (or (shl x, C1), (srl x, 128-C1)).
10120SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10121 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10122 assert(Op.getValueType() == MVT::v1i128 &&
10123 "Only set v1i128 as custom, other type shouldn't reach here!");
10124 SDLoc dl(Op);
10125 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10126 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10127 unsigned SHLAmt = N1.getConstantOperandVal(0);
10128 if (SHLAmt % 8 == 0) {
10129 std::array<int, 16> Mask;
10130 std::iota(Mask.begin(), Mask.end(), 0);
10131 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10132 if (SDValue Shuffle =
10133 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10134 DAG.getUNDEF(MVT::v16i8), Mask))
10135 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10136 }
10137 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10138 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10139 DAG.getConstant(SHLAmt, dl, MVT::i32));
10140 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10141 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10142 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10143 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10144}
10145
10146/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10147/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10148/// return the code it can be lowered into. Worst case, it can always be
10149/// lowered into a vperm.
10150SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10151 SelectionDAG &DAG) const {
10152 SDLoc dl(Op);
10153 SDValue V1 = Op.getOperand(0);
10154 SDValue V2 = Op.getOperand(1);
10155 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10156
10157 // Any nodes that were combined in the target-independent combiner prior
10158 // to vector legalization will not be sent to the target combine. Try to
10159 // combine it here.
10160 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10161 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10162 return NewShuffle;
10163 Op = NewShuffle;
10164 SVOp = cast<ShuffleVectorSDNode>(Op);
10165 V1 = Op.getOperand(0);
10166 V2 = Op.getOperand(1);
10167 }
10168 EVT VT = Op.getValueType();
10169 bool isLittleEndian = Subtarget.isLittleEndian();
10170
10171 unsigned ShiftElts, InsertAtByte;
10172 bool Swap = false;
10173
10174 // If this is a load-and-splat, we can do that with a single instruction
10175 // in some cases. However if the load has multiple uses, we don't want to
10176 // combine it because that will just produce multiple loads.
10177 bool IsPermutedLoad = false;
10178 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10179 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10180 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10181 InputLoad->hasOneUse()) {
10182 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10183 int SplatIdx =
10184 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10185
10186 // The splat index for permuted loads will be in the left half of the vector
10187 // which is strictly wider than the loaded value by 8 bytes. So we need to
10188 // adjust the splat index to point to the correct address in memory.
10189 if (IsPermutedLoad) {
10190 assert((isLittleEndian || IsFourByte) &&
10191 "Unexpected size for permuted load on big endian target");
10192 SplatIdx += IsFourByte ? 2 : 1;
10193 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10194 "Splat of a value outside of the loaded memory");
10195 }
10196
10197 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10198 // For 4-byte load-and-splat, we need Power9.
10199 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10200 uint64_t Offset = 0;
10201 if (IsFourByte)
10202 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10203 else
10204 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10205
10206 // If the width of the load is the same as the width of the splat,
10207 // loading with an offset would load the wrong memory.
10208 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10209 Offset = 0;
10210
10211 SDValue BasePtr = LD->getBasePtr();
10212 if (Offset != 0)
10214 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10215 SDValue Ops[] = {
10216 LD->getChain(), // Chain
10217 BasePtr, // BasePtr
10218 DAG.getValueType(Op.getValueType()) // VT
10219 };
10220 SDVTList VTL =
10221 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10222 SDValue LdSplt =
10224 Ops, LD->getMemoryVT(), LD->getMemOperand());
10225 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10226 if (LdSplt.getValueType() != SVOp->getValueType(0))
10227 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10228 return LdSplt;
10229 }
10230 }
10231
10232 // All v2i64 and v2f64 shuffles are legal
10233 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10234 return Op;
10235
10236 if (Subtarget.hasP9Vector() &&
10237 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10238 isLittleEndian)) {
10239 if (V2.isUndef())
10240 V2 = V1;
10241 else if (Swap)
10242 std::swap(V1, V2);
10243 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10244 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10245 if (ShiftElts) {
10246 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10247 DAG.getConstant(ShiftElts, dl, MVT::i32));
10248 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10249 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10250 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10251 }
10252 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10253 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10254 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10255 }
10256
10257 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10258 SDValue SplatInsertNode;
10259 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10260 return SplatInsertNode;
10261 }
10262
10263 if (Subtarget.hasP9Altivec()) {
10264 SDValue NewISDNode;
10265 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10266 return NewISDNode;
10267
10268 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10269 return NewISDNode;
10270 }
10271
10272 if (Subtarget.hasVSX() &&
10273 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10274 if (Swap)
10275 std::swap(V1, V2);
10276 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10277 SDValue Conv2 =
10278 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10279
10280 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10281 DAG.getConstant(ShiftElts, dl, MVT::i32));
10282 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10283 }
10284
10285 if (Subtarget.hasVSX() &&
10286 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10287 if (Swap)
10288 std::swap(V1, V2);
10289 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10290 SDValue Conv2 =
10291 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10292
10293 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10294 DAG.getConstant(ShiftElts, dl, MVT::i32));
10295 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10296 }
10297
10298 if (Subtarget.hasP9Vector()) {
10299 if (PPC::isXXBRHShuffleMask(SVOp)) {
10300 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10301 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10302 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10303 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10304 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10305 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10306 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10307 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10308 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10309 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10310 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10311 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10312 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10313 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10314 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10315 }
10316 }
10317
10318 if (Subtarget.hasVSX()) {
10319 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10320 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10321
10322 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10323 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10324 DAG.getConstant(SplatIdx, dl, MVT::i32));
10325 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10326 }
10327
10328 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10329 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10330 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10331 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10332 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10333 }
10334 }
10335
10336 // Cases that are handled by instructions that take permute immediates
10337 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10338 // selected by the instruction selector.
10339 if (V2.isUndef()) {
10340 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10341 PPC::isSplatShuffleMask(SVOp, 2) ||
10342 PPC::isSplatShuffleMask(SVOp, 4) ||
10343 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10344 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10345 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10346 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10347 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10348 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10349 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10350 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10351 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10352 (Subtarget.hasP8Altivec() && (
10353 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10354 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10355 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10356 return Op;
10357 }
10358 }
10359
10360 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10361 // and produce a fixed permutation. If any of these match, do not lower to
10362 // VPERM.
10363 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10364 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10365 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10366 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10367 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10368 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10369 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10370 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10371 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10372 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10373 (Subtarget.hasP8Altivec() && (
10374 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10375 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10376 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10377 return Op;
10378
10379 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10380 // perfect shuffle table to emit an optimal matching sequence.
10381 ArrayRef<int> PermMask = SVOp->getMask();
10382
10383 if (!DisablePerfectShuffle && !isLittleEndian) {
10384 unsigned PFIndexes[4];
10385 bool isFourElementShuffle = true;
10386 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10387 ++i) { // Element number
10388 unsigned EltNo = 8; // Start out undef.
10389 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10390 if (PermMask[i * 4 + j] < 0)
10391 continue; // Undef, ignore it.
10392
10393 unsigned ByteSource = PermMask[i * 4 + j];
10394 if ((ByteSource & 3) != j) {
10395 isFourElementShuffle = false;
10396 break;
10397 }
10398
10399 if (EltNo == 8) {
10400 EltNo = ByteSource / 4;
10401 } else if (EltNo != ByteSource / 4) {
10402 isFourElementShuffle = false;
10403 break;
10404 }
10405 }
10406 PFIndexes[i] = EltNo;
10407 }
10408
10409 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10410 // perfect shuffle vector to determine if it is cost effective to do this as
10411 // discrete instructions, or whether we should use a vperm.
10412 // For now, we skip this for little endian until such time as we have a
10413 // little-endian perfect shuffle table.
10414 if (isFourElementShuffle) {
10415 // Compute the index in the perfect shuffle table.
10416 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10417 PFIndexes[2] * 9 + PFIndexes[3];
10418
10419 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10420 unsigned Cost = (PFEntry >> 30);
10421
10422 // Determining when to avoid vperm is tricky. Many things affect the cost
10423 // of vperm, particularly how many times the perm mask needs to be
10424 // computed. For example, if the perm mask can be hoisted out of a loop or
10425 // is already used (perhaps because there are multiple permutes with the
10426 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10427 // permute mask out of the loop requires an extra register.
10428 //
10429 // As a compromise, we only emit discrete instructions if the shuffle can
10430 // be generated in 3 or fewer operations. When we have loop information
10431 // available, if this block is within a loop, we should avoid using vperm
10432 // for 3-operation perms and use a constant pool load instead.
10433 if (Cost < 3)
10434 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10435 }
10436 }
10437
10438 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10439 // vector that will get spilled to the constant pool.
10440 if (V2.isUndef()) V2 = V1;
10441
10442 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10443}
10444
10445SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10446 ArrayRef<int> PermMask, EVT VT,
10447 SDValue V1, SDValue V2) const {
10448 unsigned Opcode = PPCISD::VPERM;
10449 EVT ValType = V1.getValueType();
10450 SDLoc dl(Op);
10451 bool NeedSwap = false;
10452 bool isLittleEndian = Subtarget.isLittleEndian();
10453 bool isPPC64 = Subtarget.isPPC64();
10454
10455 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10456 (V1->hasOneUse() || V2->hasOneUse())) {
10457 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10458 "XXPERM instead\n");
10459 Opcode = PPCISD::XXPERM;
10460
10461 // The second input to XXPERM is also an output so if the second input has
10462 // multiple uses then copying is necessary, as a result we want the
10463 // single-use operand to be used as the second input to prevent copying.
10464 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10465 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10466 std::swap(V1, V2);
10467 NeedSwap = !NeedSwap;
10468 }
10469 }
10470
10471 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10472 // that it is in input element units, not in bytes. Convert now.
10473
10474 // For little endian, the order of the input vectors is reversed, and
10475 // the permutation mask is complemented with respect to 31. This is
10476 // necessary to produce proper semantics with the big-endian-based vperm
10477 // instruction.
10478 EVT EltVT = V1.getValueType().getVectorElementType();
10479 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10480
10481 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10482 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10483
10484 /*
10485 Vectors will be appended like so: [ V1 | v2 ]
10486 XXSWAPD on V1:
10487 [ A | B | C | D ] -> [ C | D | A | B ]
10488 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10489 i.e. index of A, B += 8, and index of C, D -= 8.
10490 XXSWAPD on V2:
10491 [ E | F | G | H ] -> [ G | H | E | F ]
10492 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10493 i.e. index of E, F += 8, index of G, H -= 8
10494 Swap V1 and V2:
10495 [ V1 | V2 ] -> [ V2 | V1 ]
10496 0-15 16-31 0-15 16-31
10497 i.e. index of V1 += 16, index of V2 -= 16
10498 */
10499
10500 SmallVector<SDValue, 16> ResultMask;
10501 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10502 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10503
10504 if (V1HasXXSWAPD) {
10505 if (SrcElt < 8)
10506 SrcElt += 8;
10507 else if (SrcElt < 16)
10508 SrcElt -= 8;
10509 }
10510 if (V2HasXXSWAPD) {
10511 if (SrcElt > 23)
10512 SrcElt -= 8;
10513 else if (SrcElt > 15)
10514 SrcElt += 8;
10515 }
10516 if (NeedSwap) {
10517 if (SrcElt < 16)
10518 SrcElt += 16;
10519 else
10520 SrcElt -= 16;
10521 }
10522 for (unsigned j = 0; j != BytesPerElement; ++j)
10523 if (isLittleEndian)
10524 ResultMask.push_back(
10525 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10526 else
10527 ResultMask.push_back(
10528 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10529 }
10530
10531 if (V1HasXXSWAPD) {
10532 dl = SDLoc(V1->getOperand(0));
10533 V1 = V1->getOperand(0)->getOperand(1);
10534 }
10535 if (V2HasXXSWAPD) {
10536 dl = SDLoc(V2->getOperand(0));
10537 V2 = V2->getOperand(0)->getOperand(1);
10538 }
10539
10540 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10541 if (ValType != MVT::v2f64)
10542 V1 = DAG.getBitcast(MVT::v2f64, V1);
10543 if (V2.getValueType() != MVT::v2f64)
10544 V2 = DAG.getBitcast(MVT::v2f64, V2);
10545 }
10546
10547 ShufflesHandledWithVPERM++;
10548 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10549 LLVM_DEBUG({
10550 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10551 if (Opcode == PPCISD::XXPERM) {
10552 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10553 } else {
10554 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10555 }
10556 SVOp->dump();
10557 dbgs() << "With the following permute control vector:\n";
10558 VPermMask.dump();
10559 });
10560
10561 if (Opcode == PPCISD::XXPERM)
10562 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10563
10564 // Only need to place items backwards in LE,
10565 // the mask was properly calculated.
10566 if (isLittleEndian)
10567 std::swap(V1, V2);
10568
10569 SDValue VPERMNode =
10570 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10571
10572 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10573 return VPERMNode;
10574}
10575
10576/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10577/// vector comparison. If it is, return true and fill in Opc/isDot with
10578/// information about the intrinsic.
10579static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10580 bool &isDot, const PPCSubtarget &Subtarget) {
10581 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10582 CompareOpc = -1;
10583 isDot = false;
10584 switch (IntrinsicID) {
10585 default:
10586 return false;
10587 // Comparison predicates.
10588 case Intrinsic::ppc_altivec_vcmpbfp_p:
10589 CompareOpc = 966;
10590 isDot = true;
10591 break;
10592 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10593 CompareOpc = 198;
10594 isDot = true;
10595 break;
10596 case Intrinsic::ppc_altivec_vcmpequb_p:
10597 CompareOpc = 6;
10598 isDot = true;
10599 break;
10600 case Intrinsic::ppc_altivec_vcmpequh_p:
10601 CompareOpc = 70;
10602 isDot = true;
10603 break;
10604 case Intrinsic::ppc_altivec_vcmpequw_p:
10605 CompareOpc = 134;
10606 isDot = true;
10607 break;
10608 case Intrinsic::ppc_altivec_vcmpequd_p:
10609 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10610 CompareOpc = 199;
10611 isDot = true;
10612 } else
10613 return false;
10614 break;
10615 case Intrinsic::ppc_altivec_vcmpneb_p:
10616 case Intrinsic::ppc_altivec_vcmpneh_p:
10617 case Intrinsic::ppc_altivec_vcmpnew_p:
10618 case Intrinsic::ppc_altivec_vcmpnezb_p:
10619 case Intrinsic::ppc_altivec_vcmpnezh_p:
10620 case Intrinsic::ppc_altivec_vcmpnezw_p:
10621 if (Subtarget.hasP9Altivec()) {
10622 switch (IntrinsicID) {
10623 default:
10624 llvm_unreachable("Unknown comparison intrinsic.");
10625 case Intrinsic::ppc_altivec_vcmpneb_p:
10626 CompareOpc = 7;
10627 break;
10628 case Intrinsic::ppc_altivec_vcmpneh_p:
10629 CompareOpc = 71;
10630 break;
10631 case Intrinsic::ppc_altivec_vcmpnew_p:
10632 CompareOpc = 135;
10633 break;
10634 case Intrinsic::ppc_altivec_vcmpnezb_p:
10635 CompareOpc = 263;
10636 break;
10637 case Intrinsic::ppc_altivec_vcmpnezh_p:
10638 CompareOpc = 327;
10639 break;
10640 case Intrinsic::ppc_altivec_vcmpnezw_p:
10641 CompareOpc = 391;
10642 break;
10643 }
10644 isDot = true;
10645 } else
10646 return false;
10647 break;
10648 case Intrinsic::ppc_altivec_vcmpgefp_p:
10649 CompareOpc = 454;
10650 isDot = true;
10651 break;
10652 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10653 CompareOpc = 710;
10654 isDot = true;
10655 break;
10656 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10657 CompareOpc = 774;
10658 isDot = true;
10659 break;
10660 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10661 CompareOpc = 838;
10662 isDot = true;
10663 break;
10664 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10665 CompareOpc = 902;
10666 isDot = true;
10667 break;
10668 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10669 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10670 CompareOpc = 967;
10671 isDot = true;
10672 } else
10673 return false;
10674 break;
10675 case Intrinsic::ppc_altivec_vcmpgtub_p:
10676 CompareOpc = 518;
10677 isDot = true;
10678 break;
10679 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10680 CompareOpc = 582;
10681 isDot = true;
10682 break;
10683 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10684 CompareOpc = 646;
10685 isDot = true;
10686 break;
10687 case Intrinsic::ppc_altivec_vcmpgtud_p:
10688 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10689 CompareOpc = 711;
10690 isDot = true;
10691 } else
10692 return false;
10693 break;
10694
10695 case Intrinsic::ppc_altivec_vcmpequq:
10696 case Intrinsic::ppc_altivec_vcmpgtsq:
10697 case Intrinsic::ppc_altivec_vcmpgtuq:
10698 if (!Subtarget.isISA3_1())
10699 return false;
10700 switch (IntrinsicID) {
10701 default:
10702 llvm_unreachable("Unknown comparison intrinsic.");
10703 case Intrinsic::ppc_altivec_vcmpequq:
10704 CompareOpc = 455;
10705 break;
10706 case Intrinsic::ppc_altivec_vcmpgtsq:
10707 CompareOpc = 903;
10708 break;
10709 case Intrinsic::ppc_altivec_vcmpgtuq:
10710 CompareOpc = 647;
10711 break;
10712 }
10713 break;
10714
10715 // VSX predicate comparisons use the same infrastructure
10716 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10717 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10718 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10719 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10720 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10721 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10722 if (Subtarget.hasVSX()) {
10723 switch (IntrinsicID) {
10724 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10725 CompareOpc = 99;
10726 break;
10727 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10728 CompareOpc = 115;
10729 break;
10730 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10731 CompareOpc = 107;
10732 break;
10733 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10734 CompareOpc = 67;
10735 break;
10736 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10737 CompareOpc = 83;
10738 break;
10739 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10740 CompareOpc = 75;
10741 break;
10742 }
10743 isDot = true;
10744 } else
10745 return false;
10746 break;
10747
10748 // Normal Comparisons.
10749 case Intrinsic::ppc_altivec_vcmpbfp:
10750 CompareOpc = 966;
10751 break;
10752 case Intrinsic::ppc_altivec_vcmpeqfp:
10753 CompareOpc = 198;
10754 break;
10755 case Intrinsic::ppc_altivec_vcmpequb:
10756 CompareOpc = 6;
10757 break;
10758 case Intrinsic::ppc_altivec_vcmpequh:
10759 CompareOpc = 70;
10760 break;
10761 case Intrinsic::ppc_altivec_vcmpequw:
10762 CompareOpc = 134;
10763 break;
10764 case Intrinsic::ppc_altivec_vcmpequd:
10765 if (Subtarget.hasP8Altivec())
10766 CompareOpc = 199;
10767 else
10768 return false;
10769 break;
10770 case Intrinsic::ppc_altivec_vcmpneb:
10771 case Intrinsic::ppc_altivec_vcmpneh:
10772 case Intrinsic::ppc_altivec_vcmpnew:
10773 case Intrinsic::ppc_altivec_vcmpnezb:
10774 case Intrinsic::ppc_altivec_vcmpnezh:
10775 case Intrinsic::ppc_altivec_vcmpnezw:
10776 if (Subtarget.hasP9Altivec())
10777 switch (IntrinsicID) {
10778 default:
10779 llvm_unreachable("Unknown comparison intrinsic.");
10780 case Intrinsic::ppc_altivec_vcmpneb:
10781 CompareOpc = 7;
10782 break;
10783 case Intrinsic::ppc_altivec_vcmpneh:
10784 CompareOpc = 71;
10785 break;
10786 case Intrinsic::ppc_altivec_vcmpnew:
10787 CompareOpc = 135;
10788 break;
10789 case Intrinsic::ppc_altivec_vcmpnezb:
10790 CompareOpc = 263;
10791 break;
10792 case Intrinsic::ppc_altivec_vcmpnezh:
10793 CompareOpc = 327;
10794 break;
10795 case Intrinsic::ppc_altivec_vcmpnezw:
10796 CompareOpc = 391;
10797 break;
10798 }
10799 else
10800 return false;
10801 break;
10802 case Intrinsic::ppc_altivec_vcmpgefp:
10803 CompareOpc = 454;
10804 break;
10805 case Intrinsic::ppc_altivec_vcmpgtfp:
10806 CompareOpc = 710;
10807 break;
10808 case Intrinsic::ppc_altivec_vcmpgtsb:
10809 CompareOpc = 774;
10810 break;
10811 case Intrinsic::ppc_altivec_vcmpgtsh:
10812 CompareOpc = 838;
10813 break;
10814 case Intrinsic::ppc_altivec_vcmpgtsw:
10815 CompareOpc = 902;
10816 break;
10817 case Intrinsic::ppc_altivec_vcmpgtsd:
10818 if (Subtarget.hasP8Altivec())
10819 CompareOpc = 967;
10820 else
10821 return false;
10822 break;
10823 case Intrinsic::ppc_altivec_vcmpgtub:
10824 CompareOpc = 518;
10825 break;
10826 case Intrinsic::ppc_altivec_vcmpgtuh:
10827 CompareOpc = 582;
10828 break;
10829 case Intrinsic::ppc_altivec_vcmpgtuw:
10830 CompareOpc = 646;
10831 break;
10832 case Intrinsic::ppc_altivec_vcmpgtud:
10833 if (Subtarget.hasP8Altivec())
10834 CompareOpc = 711;
10835 else
10836 return false;
10837 break;
10838 case Intrinsic::ppc_altivec_vcmpequq_p:
10839 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10840 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10841 if (!Subtarget.isISA3_1())
10842 return false;
10843 switch (IntrinsicID) {
10844 default:
10845 llvm_unreachable("Unknown comparison intrinsic.");
10846 case Intrinsic::ppc_altivec_vcmpequq_p:
10847 CompareOpc = 455;
10848 break;
10849 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10850 CompareOpc = 903;
10851 break;
10852 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10853 CompareOpc = 647;
10854 break;
10855 }
10856 isDot = true;
10857 break;
10858 }
10859 return true;
10860}
10861
10862/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10863/// lower, do it, otherwise return null.
10864SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10865 SelectionDAG &DAG) const {
10866 unsigned IntrinsicID = Op.getConstantOperandVal(0);
10867
10868 SDLoc dl(Op);
10869
10870 switch (IntrinsicID) {
10871 case Intrinsic::thread_pointer:
10872 // Reads the thread pointer register, used for __builtin_thread_pointer.
10873 if (Subtarget.isPPC64())
10874 return DAG.getRegister(PPC::X13, MVT::i64);
10875 return DAG.getRegister(PPC::R2, MVT::i32);
10876
10877 case Intrinsic::ppc_rldimi: {
10878 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10879 SDValue Src = Op.getOperand(1);
10880 APInt Mask = Op.getConstantOperandAPInt(4);
10881 if (Mask.isZero())
10882 return Op.getOperand(2);
10883 if (Mask.isAllOnes())
10884 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
10885 uint64_t SH = Op.getConstantOperandVal(3);
10886 unsigned MB = 0, ME = 0;
10887 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
10888 report_fatal_error("invalid rldimi mask!");
10889 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10890 if (ME < 63 - SH) {
10891 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10892 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10893 } else if (ME > 63 - SH) {
10894 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10895 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10896 }
10897 return SDValue(
10898 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10899 {Op.getOperand(2), Src,
10900 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10901 DAG.getTargetConstant(MB, dl, MVT::i32)}),
10902 0);
10903 }
10904
10905 case Intrinsic::ppc_rlwimi: {
10906 APInt Mask = Op.getConstantOperandAPInt(4);
10907 if (Mask.isZero())
10908 return Op.getOperand(2);
10909 if (Mask.isAllOnes())
10910 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
10911 Op.getOperand(3));
10912 unsigned MB = 0, ME = 0;
10913 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
10914 report_fatal_error("invalid rlwimi mask!");
10915 return SDValue(DAG.getMachineNode(
10916 PPC::RLWIMI, dl, MVT::i32,
10917 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10918 DAG.getTargetConstant(MB, dl, MVT::i32),
10919 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10920 0);
10921 }
10922
10923 case Intrinsic::ppc_rlwnm: {
10924 if (Op.getConstantOperandVal(3) == 0)
10925 return DAG.getConstant(0, dl, MVT::i32);
10926 unsigned MB = 0, ME = 0;
10927 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
10928 report_fatal_error("invalid rlwnm mask!");
10929 return SDValue(
10930 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
10931 {Op.getOperand(1), Op.getOperand(2),
10932 DAG.getTargetConstant(MB, dl, MVT::i32),
10933 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10934 0);
10935 }
10936
10937 case Intrinsic::ppc_mma_disassemble_acc: {
10938 if (Subtarget.isISAFuture()) {
10939 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10940 SDValue WideVec =
10941 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
10942 Op.getOperand(1)),
10943 0);
10945 SDValue Value = SDValue(WideVec.getNode(), 0);
10946 SDValue Value2 = SDValue(WideVec.getNode(), 1);
10947
10948 SDValue Extract;
10949 Extract = DAG.getNode(
10950 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10951 Subtarget.isLittleEndian() ? Value2 : Value,
10952 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10953 dl, getPointerTy(DAG.getDataLayout())));
10954 RetOps.push_back(Extract);
10955 Extract = DAG.getNode(
10956 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10957 Subtarget.isLittleEndian() ? Value2 : Value,
10958 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10959 dl, getPointerTy(DAG.getDataLayout())));
10960 RetOps.push_back(Extract);
10961 Extract = DAG.getNode(
10962 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10963 Subtarget.isLittleEndian() ? Value : Value2,
10964 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10965 dl, getPointerTy(DAG.getDataLayout())));
10966 RetOps.push_back(Extract);
10967 Extract = DAG.getNode(
10968 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10969 Subtarget.isLittleEndian() ? Value : Value2,
10970 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10971 dl, getPointerTy(DAG.getDataLayout())));
10972 RetOps.push_back(Extract);
10973 return DAG.getMergeValues(RetOps, dl);
10974 }
10975 [[fallthrough]];
10976 }
10977 case Intrinsic::ppc_vsx_disassemble_pair: {
10978 int NumVecs = 2;
10979 SDValue WideVec = Op.getOperand(1);
10980 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10981 NumVecs = 4;
10982 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10983 }
10985 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10986 SDValue Extract = DAG.getNode(
10987 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10988 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10989 : VecNo,
10990 dl, getPointerTy(DAG.getDataLayout())));
10991 RetOps.push_back(Extract);
10992 }
10993 return DAG.getMergeValues(RetOps, dl);
10994 }
10995
10996 case Intrinsic::ppc_mma_xxmfacc:
10997 case Intrinsic::ppc_mma_xxmtacc: {
10998 // Allow pre-isa-future subtargets to lower as normal.
10999 if (!Subtarget.isISAFuture())
11000 return SDValue();
11001 // The intrinsics for xxmtacc and xxmfacc take one argument of
11002 // type v512i1, for future cpu the corresponding wacc instruction
11003 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11004 // the need to produce the xxm[t|f]acc.
11005 SDValue WideVec = Op.getOperand(1);
11006 DAG.ReplaceAllUsesWith(Op, WideVec);
11007 return SDValue();
11008 }
11009
11010 case Intrinsic::ppc_unpack_longdouble: {
11011 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11012 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11013 "Argument of long double unpack must be 0 or 1!");
11014 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11015 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11016 Idx->getValueType(0)));
11017 }
11018
11019 case Intrinsic::ppc_compare_exp_lt:
11020 case Intrinsic::ppc_compare_exp_gt:
11021 case Intrinsic::ppc_compare_exp_eq:
11022 case Intrinsic::ppc_compare_exp_uo: {
11023 unsigned Pred;
11024 switch (IntrinsicID) {
11025 case Intrinsic::ppc_compare_exp_lt:
11026 Pred = PPC::PRED_LT;
11027 break;
11028 case Intrinsic::ppc_compare_exp_gt:
11029 Pred = PPC::PRED_GT;
11030 break;
11031 case Intrinsic::ppc_compare_exp_eq:
11032 Pred = PPC::PRED_EQ;
11033 break;
11034 case Intrinsic::ppc_compare_exp_uo:
11035 Pred = PPC::PRED_UN;
11036 break;
11037 }
11038 return SDValue(
11039 DAG.getMachineNode(
11040 PPC::SELECT_CC_I4, dl, MVT::i32,
11041 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11042 Op.getOperand(1), Op.getOperand(2)),
11043 0),
11044 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11045 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11046 0);
11047 }
11048 case Intrinsic::ppc_test_data_class: {
11049 EVT OpVT = Op.getOperand(1).getValueType();
11050 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11051 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11052 : PPC::XSTSTDCSP);
11053 return SDValue(
11054 DAG.getMachineNode(
11055 PPC::SELECT_CC_I4, dl, MVT::i32,
11056 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11057 Op.getOperand(1)),
11058 0),
11059 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11060 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11061 0);
11062 }
11063 case Intrinsic::ppc_fnmsub: {
11064 EVT VT = Op.getOperand(1).getValueType();
11065 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11066 return DAG.getNode(
11067 ISD::FNEG, dl, VT,
11068 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11069 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11070 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11071 Op.getOperand(2), Op.getOperand(3));
11072 }
11073 case Intrinsic::ppc_convert_f128_to_ppcf128:
11074 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11075 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11076 ? RTLIB::CONVERT_PPCF128_F128
11077 : RTLIB::CONVERT_F128_PPCF128;
11078 MakeLibCallOptions CallOptions;
11079 std::pair<SDValue, SDValue> Result =
11080 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11081 dl, SDValue());
11082 return Result.first;
11083 }
11084 case Intrinsic::ppc_maxfe:
11085 case Intrinsic::ppc_maxfl:
11086 case Intrinsic::ppc_maxfs:
11087 case Intrinsic::ppc_minfe:
11088 case Intrinsic::ppc_minfl:
11089 case Intrinsic::ppc_minfs: {
11090 EVT VT = Op.getValueType();
11091 assert(
11092 all_of(Op->ops().drop_front(4),
11093 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11094 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11095 (void)VT;
11097 if (IntrinsicID == Intrinsic::ppc_minfe ||
11098 IntrinsicID == Intrinsic::ppc_minfl ||
11099 IntrinsicID == Intrinsic::ppc_minfs)
11100 CC = ISD::SETLT;
11101 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11102 SDValue Res = Op.getOperand(I);
11103 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11104 Res =
11105 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11106 }
11107 return Res;
11108 }
11109 }
11110
11111 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11112 // opcode number of the comparison.
11113 int CompareOpc;
11114 bool isDot;
11115 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11116 return SDValue(); // Don't custom lower most intrinsics.
11117
11118 // If this is a non-dot comparison, make the VCMP node and we are done.
11119 if (!isDot) {
11120 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11121 Op.getOperand(1), Op.getOperand(2),
11122 DAG.getConstant(CompareOpc, dl, MVT::i32));
11123 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11124 }
11125
11126 // Create the PPCISD altivec 'dot' comparison node.
11127 SDValue Ops[] = {
11128 Op.getOperand(2), // LHS
11129 Op.getOperand(3), // RHS
11130 DAG.getConstant(CompareOpc, dl, MVT::i32)
11131 };
11132 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11133 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11134
11135 // Now that we have the comparison, emit a copy from the CR to a GPR.
11136 // This is flagged to the above dot comparison.
11137 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11138 DAG.getRegister(PPC::CR6, MVT::i32),
11139 CompNode.getValue(1));
11140
11141 // Unpack the result based on how the target uses it.
11142 unsigned BitNo; // Bit # of CR6.
11143 bool InvertBit; // Invert result?
11144 switch (Op.getConstantOperandVal(1)) {
11145 default: // Can't happen, don't crash on invalid number though.
11146 case 0: // Return the value of the EQ bit of CR6.
11147 BitNo = 0; InvertBit = false;
11148 break;
11149 case 1: // Return the inverted value of the EQ bit of CR6.
11150 BitNo = 0; InvertBit = true;
11151 break;
11152 case 2: // Return the value of the LT bit of CR6.
11153 BitNo = 2; InvertBit = false;
11154 break;
11155 case 3: // Return the inverted value of the LT bit of CR6.
11156 BitNo = 2; InvertBit = true;
11157 break;
11158 }
11159
11160 // Shift the bit into the low position.
11161 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11162 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11163 // Isolate the bit.
11164 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11165 DAG.getConstant(1, dl, MVT::i32));
11166
11167 // If we are supposed to, toggle the bit.
11168 if (InvertBit)
11169 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11170 DAG.getConstant(1, dl, MVT::i32));
11171 return Flags;
11172}
11173
11174SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11175 SelectionDAG &DAG) const {
11176 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11177 // the beginning of the argument list.
11178 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11179 SDLoc DL(Op);
11180 switch (Op.getConstantOperandVal(ArgStart)) {
11181 case Intrinsic::ppc_cfence: {
11182 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11183 SDValue Val = Op.getOperand(ArgStart + 1);
11184 EVT Ty = Val.getValueType();
11185 if (Ty == MVT::i128) {
11186 // FIXME: Testing one of two paired registers is sufficient to guarantee
11187 // ordering?
11188 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11189 }
11190 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11191 EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
11192 return SDValue(
11193 DAG.getMachineNode(Opcode, DL, MVT::Other,
11194 DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
11195 Op.getOperand(0)),
11196 0);
11197 }
11198 default:
11199 break;
11200 }
11201 return SDValue();
11202}
11203
11204// Lower scalar BSWAP64 to xxbrd.
11205SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11206 SDLoc dl(Op);
11207 if (!Subtarget.isPPC64())
11208 return Op;
11209 // MTVSRDD
11210 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11211 Op.getOperand(0));
11212 // XXBRD
11213 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11214 // MFVSRD
11215 int VectorIndex = 0;
11216 if (Subtarget.isLittleEndian())
11217 VectorIndex = 1;
11218 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11219 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11220 return Op;
11221}
11222
11223// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11224// compared to a value that is atomically loaded (atomic loads zero-extend).
11225SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11226 SelectionDAG &DAG) const {
11227 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11228 "Expecting an atomic compare-and-swap here.");
11229 SDLoc dl(Op);
11230 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11231 EVT MemVT = AtomicNode->getMemoryVT();
11232 if (MemVT.getSizeInBits() >= 32)
11233 return Op;
11234
11235 SDValue CmpOp = Op.getOperand(2);
11236 // If this is already correctly zero-extended, leave it alone.
11237 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11238 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11239 return Op;
11240
11241 // Clear the high bits of the compare operand.
11242 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11243 SDValue NewCmpOp =
11244 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11245 DAG.getConstant(MaskVal, dl, MVT::i32));
11246
11247 // Replace the existing compare operand with the properly zero-extended one.
11249 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11250 Ops.push_back(AtomicNode->getOperand(i));
11251 Ops[2] = NewCmpOp;
11252 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11253 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11254 auto NodeTy =
11256 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11257}
11258
11259SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11260 SelectionDAG &DAG) const {
11261 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11262 EVT MemVT = N->getMemoryVT();
11263 assert(MemVT.getSimpleVT() == MVT::i128 &&
11264 "Expect quadword atomic operations");
11265 SDLoc dl(N);
11266 unsigned Opc = N->getOpcode();
11267 switch (Opc) {
11268 case ISD::ATOMIC_LOAD: {
11269 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11270 // lowered to ppc instructions by pattern matching instruction selector.
11271 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11273 N->getOperand(0),
11274 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11275 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11276 Ops.push_back(N->getOperand(I));
11277 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11278 Ops, MemVT, N->getMemOperand());
11279 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11280 SDValue ValHi =
11281 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11282 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11283 DAG.getConstant(64, dl, MVT::i32));
11284 SDValue Val =
11285 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11286 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11287 {Val, LoadedVal.getValue(2)});
11288 }
11289 case ISD::ATOMIC_STORE: {
11290 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11291 // lowered to ppc instructions by pattern matching instruction selector.
11292 SDVTList Tys = DAG.getVTList(MVT::Other);
11294 N->getOperand(0),
11295 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11296 SDValue Val = N->getOperand(1);
11297 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11298 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11299 DAG.getConstant(64, dl, MVT::i32));
11300 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11301 Ops.push_back(ValLo);
11302 Ops.push_back(ValHi);
11303 Ops.push_back(N->getOperand(2));
11304 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11305 N->getMemOperand());
11306 }
11307 default:
11308 llvm_unreachable("Unexpected atomic opcode");
11309 }
11310}
11311
11313 SelectionDAG &DAG,
11314 const PPCSubtarget &Subtarget) {
11315 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11316
11317 enum DataClassMask {
11318 DC_NAN = 1 << 6,
11319 DC_NEG_INF = 1 << 4,
11320 DC_POS_INF = 1 << 5,
11321 DC_NEG_ZERO = 1 << 2,
11322 DC_POS_ZERO = 1 << 3,
11323 DC_NEG_SUBNORM = 1,
11324 DC_POS_SUBNORM = 1 << 1,
11325 };
11326
11327 EVT VT = Op.getValueType();
11328
11329 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11330 : VT == MVT::f64 ? PPC::XSTSTDCDP
11331 : PPC::XSTSTDCSP;
11332
11333 if (Mask == fcAllFlags)
11334 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11335 if (Mask == 0)
11336 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11337
11338 // When it's cheaper or necessary to test reverse flags.
11339 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11340 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11341 return DAG.getNOT(Dl, Rev, MVT::i1);
11342 }
11343
11344 // Power doesn't support testing whether a value is 'normal'. Test the rest
11345 // first, and test if it's 'not not-normal' with expected sign.
11346 if (Mask & fcNormal) {
11347 SDValue Rev(DAG.getMachineNode(
11348 TestOp, Dl, MVT::i32,
11349 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11350 DC_NEG_ZERO | DC_POS_ZERO |
11351 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11352 Dl, MVT::i32),
11353 Op),
11354 0);
11355 // Sign are stored in CR bit 0, result are in CR bit 2.
11356 SDValue Sign(
11357 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11358 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11359 0);
11360 SDValue Normal(DAG.getNOT(
11361 Dl,
11363 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11364 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11365 0),
11366 MVT::i1));
11367 if (Mask & fcPosNormal)
11368 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11369 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11370 if (Mask == fcPosNormal || Mask == fcNegNormal)
11371 return Result;
11372
11373 return DAG.getNode(
11374 ISD::OR, Dl, MVT::i1,
11375 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11376 }
11377
11378 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11379 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11380 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11381 bool IsQuiet = Mask & fcQNan;
11382 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11383
11384 // Quietness is determined by the first bit in fraction field.
11385 uint64_t QuietMask = 0;
11386 SDValue HighWord;
11387 if (VT == MVT::f128) {
11388 HighWord = DAG.getNode(
11389 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11390 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11391 QuietMask = 0x8000;
11392 } else if (VT == MVT::f64) {
11393 if (Subtarget.isPPC64()) {
11394 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11395 DAG.getBitcast(MVT::i64, Op),
11396 DAG.getConstant(1, Dl, MVT::i32));
11397 } else {
11398 SDValue Vec = DAG.getBitcast(
11399 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11400 HighWord = DAG.getNode(
11401 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11402 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11403 }
11404 QuietMask = 0x80000;
11405 } else if (VT == MVT::f32) {
11406 HighWord = DAG.getBitcast(MVT::i32, Op);
11407 QuietMask = 0x400000;
11408 }
11409 SDValue NanRes = DAG.getSetCC(
11410 Dl, MVT::i1,
11411 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11412 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11413 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11414 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11415 if (Mask == fcQNan || Mask == fcSNan)
11416 return NanRes;
11417
11418 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11419 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11420 NanRes);
11421 }
11422
11423 unsigned NativeMask = 0;
11424 if ((Mask & fcNan) == fcNan)
11425 NativeMask |= DC_NAN;
11426 if (Mask & fcNegInf)
11427 NativeMask |= DC_NEG_INF;
11428 if (Mask & fcPosInf)
11429 NativeMask |= DC_POS_INF;
11430 if (Mask & fcNegZero)
11431 NativeMask |= DC_NEG_ZERO;
11432 if (Mask & fcPosZero)
11433 NativeMask |= DC_POS_ZERO;
11434 if (Mask & fcNegSubnormal)
11435 NativeMask |= DC_NEG_SUBNORM;
11436 if (Mask & fcPosSubnormal)
11437 NativeMask |= DC_POS_SUBNORM;
11438 return SDValue(
11439 DAG.getMachineNode(
11440 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11442 TestOp, Dl, MVT::i32,
11443 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11444 0),
11445 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11446 0);
11447}
11448
11449SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11450 SelectionDAG &DAG) const {
11451 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11452 SDValue LHS = Op.getOperand(0);
11453 uint64_t RHSC = Op.getConstantOperandVal(1);
11454 SDLoc Dl(Op);
11455 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11456 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11457}
11458
11459SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11460 SelectionDAG &DAG) const {
11461 SDLoc dl(Op);
11462 // Create a stack slot that is 16-byte aligned.
11464 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11465 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11466 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11467
11468 // Store the input value into Value#0 of the stack slot.
11469 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
11471 // Load it out.
11472 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11473}
11474
11475SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11476 SelectionDAG &DAG) const {
11477 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11478 "Should only be called for ISD::INSERT_VECTOR_ELT");
11479
11480 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11481
11482 EVT VT = Op.getValueType();
11483 SDLoc dl(Op);
11484 SDValue V1 = Op.getOperand(0);
11485 SDValue V2 = Op.getOperand(1);
11486
11487 if (VT == MVT::v2f64 && C)
11488 return Op;
11489
11490 if (Subtarget.hasP9Vector()) {
11491 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11492 // because on P10, it allows this specific insert_vector_elt load pattern to
11493 // utilize the refactored load and store infrastructure in order to exploit
11494 // prefixed loads.
11495 // On targets with inexpensive direct moves (Power9 and up), a
11496 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11497 // load since a single precision load will involve conversion to double
11498 // precision on the load followed by another conversion to single precision.
11499 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11500 (isa<LoadSDNode>(V2))) {
11501 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11502 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11503 SDValue InsVecElt =
11504 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11505 BitcastLoad, Op.getOperand(2));
11506 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11507 }
11508 }
11509
11510 if (Subtarget.isISA3_1()) {
11511 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11512 return SDValue();
11513 // On P10, we have legal lowering for constant and variable indices for
11514 // all vectors.
11515 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11516 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11517 return Op;
11518 }
11519
11520 // Before P10, we have legal lowering for constant indices but not for
11521 // variable ones.
11522 if (!C)
11523 return SDValue();
11524
11525 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11526 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11527 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11528 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11529 unsigned InsertAtElement = C->getZExtValue();
11530 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11531 if (Subtarget.isLittleEndian()) {
11532 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11533 }
11534 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11535 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11536 }
11537 return Op;
11538}
11539
11540SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11541 SelectionDAG &DAG) const {
11542 SDLoc dl(Op);
11543 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11544 SDValue LoadChain = LN->getChain();
11545 SDValue BasePtr = LN->getBasePtr();
11546 EVT VT = Op.getValueType();
11547
11548 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11549 return Op;
11550
11551 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11552 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11553 // 2 or 4 vsx registers.
11554 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11555 "Type unsupported without MMA");
11556 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11557 "Type unsupported without paired vector support");
11558 Align Alignment = LN->getAlign();
11560 SmallVector<SDValue, 4> LoadChains;
11561 unsigned NumVecs = VT.getSizeInBits() / 128;
11562 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11563 SDValue Load =
11564 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11565 LN->getPointerInfo().getWithOffset(Idx * 16),
11566 commonAlignment(Alignment, Idx * 16),
11567 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11568 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11569 DAG.getConstant(16, dl, BasePtr.getValueType()));
11570 Loads.push_back(Load);
11571 LoadChains.push_back(Load.getValue(1));
11572 }
11573 if (Subtarget.isLittleEndian()) {
11574 std::reverse(Loads.begin(), Loads.end());
11575 std::reverse(LoadChains.begin(), LoadChains.end());
11576 }
11577 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11578 SDValue Value =
11579 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11580 dl, VT, Loads);
11581 SDValue RetOps[] = {Value, TF};
11582 return DAG.getMergeValues(RetOps, dl);
11583}
11584
11585SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11586 SelectionDAG &DAG) const {
11587 SDLoc dl(Op);
11588 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11589 SDValue StoreChain = SN->getChain();
11590 SDValue BasePtr = SN->getBasePtr();
11591 SDValue Value = SN->getValue();
11592 SDValue Value2 = SN->getValue();
11593 EVT StoreVT = Value.getValueType();
11594
11595 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11596 return Op;
11597
11598 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11599 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11600 // underlying registers individually.
11601 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11602 "Type unsupported without MMA");
11603 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11604 "Type unsupported without paired vector support");
11605 Align Alignment = SN->getAlign();
11607 unsigned NumVecs = 2;
11608 if (StoreVT == MVT::v512i1) {
11609 if (Subtarget.isISAFuture()) {
11610 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11611 MachineSDNode *ExtNode = DAG.getMachineNode(
11612 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
11613
11614 Value = SDValue(ExtNode, 0);
11615 Value2 = SDValue(ExtNode, 1);
11616 } else
11617 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11618 NumVecs = 4;
11619 }
11620 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11621 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11622 SDValue Elt;
11623 if (Subtarget.isISAFuture()) {
11624 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11625 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11626 Idx > 1 ? Value2 : Value,
11627 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11628 } else
11629 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11630 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11631
11632 SDValue Store =
11633 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11634 SN->getPointerInfo().getWithOffset(Idx * 16),
11635 commonAlignment(Alignment, Idx * 16),
11636 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11637 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11638 DAG.getConstant(16, dl, BasePtr.getValueType()));
11639 Stores.push_back(Store);
11640 }
11641 SDValue TF = DAG.getTokenFactor(dl, Stores);
11642 return TF;
11643}
11644
11645SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11646 SDLoc dl(Op);
11647 if (Op.getValueType() == MVT::v4i32) {
11648 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11649
11650 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11651 // +16 as shift amt.
11652 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11653 SDValue RHSSwap = // = vrlw RHS, 16
11654 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11655
11656 // Shrinkify inputs to v8i16.
11657 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11658 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11659 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11660
11661 // Low parts multiplied together, generating 32-bit results (we ignore the
11662 // top parts).
11663 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11664 LHS, RHS, DAG, dl, MVT::v4i32);
11665
11666 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11667 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11668 // Shift the high parts up 16 bits.
11669 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11670 Neg16, DAG, dl);
11671 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11672 } else if (Op.getValueType() == MVT::v16i8) {
11673 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11674 bool isLittleEndian = Subtarget.isLittleEndian();
11675
11676 // Multiply the even 8-bit parts, producing 16-bit sums.
11677 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11678 LHS, RHS, DAG, dl, MVT::v8i16);
11679 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11680
11681 // Multiply the odd 8-bit parts, producing 16-bit sums.
11682 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11683 LHS, RHS, DAG, dl, MVT::v8i16);
11684 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11685
11686 // Merge the results together. Because vmuleub and vmuloub are
11687 // instructions with a big-endian bias, we must reverse the
11688 // element numbering and reverse the meaning of "odd" and "even"
11689 // when generating little endian code.
11690 int Ops[16];
11691 for (unsigned i = 0; i != 8; ++i) {
11692 if (isLittleEndian) {
11693 Ops[i*2 ] = 2*i;
11694 Ops[i*2+1] = 2*i+16;
11695 } else {
11696 Ops[i*2 ] = 2*i+1;
11697 Ops[i*2+1] = 2*i+1+16;
11698 }
11699 }
11700 if (isLittleEndian)
11701 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11702 else
11703 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11704 } else {
11705 llvm_unreachable("Unknown mul to lower!");
11706 }
11707}
11708
11709SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11710 bool IsStrict = Op->isStrictFPOpcode();
11711 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11712 !Subtarget.hasP9Vector())
11713 return SDValue();
11714
11715 return Op;
11716}
11717
11718// Custom lowering for fpext vf32 to v2f64
11719SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11720
11721 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11722 "Should only be called for ISD::FP_EXTEND");
11723
11724 // FIXME: handle extends from half precision float vectors on P9.
11725 // We only want to custom lower an extend from v2f32 to v2f64.
11726 if (Op.getValueType() != MVT::v2f64 ||
11727 Op.getOperand(0).getValueType() != MVT::v2f32)
11728 return SDValue();
11729
11730 SDLoc dl(Op);
11731 SDValue Op0 = Op.getOperand(0);
11732
11733 switch (Op0.getOpcode()) {
11734 default:
11735 return SDValue();
11737 assert(Op0.getNumOperands() == 2 &&
11738 isa<ConstantSDNode>(Op0->getOperand(1)) &&
11739 "Node should have 2 operands with second one being a constant!");
11740
11741 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11742 return SDValue();
11743
11744 // Custom lower is only done for high or low doubleword.
11745 int Idx = Op0.getConstantOperandVal(1);
11746 if (Idx % 2 != 0)
11747 return SDValue();
11748
11749 // Since input is v4f32, at this point Idx is either 0 or 2.
11750 // Shift to get the doubleword position we want.
11751 int DWord = Idx >> 1;
11752
11753 // High and low word positions are different on little endian.
11754 if (Subtarget.isLittleEndian())
11755 DWord ^= 0x1;
11756
11757 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11758 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11759 }
11760 case ISD::FADD:
11761 case ISD::FMUL:
11762 case ISD::FSUB: {
11763 SDValue NewLoad[2];
11764 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11765 // Ensure both input are loads.
11766 SDValue LdOp = Op0.getOperand(i);
11767 if (LdOp.getOpcode() != ISD::LOAD)
11768 return SDValue();
11769 // Generate new load node.
11770 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11771 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11772 NewLoad[i] = DAG.getMemIntrinsicNode(
11773 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11774 LD->getMemoryVT(), LD->getMemOperand());
11775 }
11776 SDValue NewOp =
11777 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11778 NewLoad[1], Op0.getNode()->getFlags());
11779 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11780 DAG.getConstant(0, dl, MVT::i32));
11781 }
11782 case ISD::LOAD: {
11783 LoadSDNode *LD = cast<LoadSDNode>(Op0);
11784 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11785 SDValue NewLd = DAG.getMemIntrinsicNode(
11786 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11787 LD->getMemoryVT(), LD->getMemOperand());
11788 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11789 DAG.getConstant(0, dl, MVT::i32));
11790 }
11791 }
11792 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11793}
11794
11795/// LowerOperation - Provide custom lowering hooks for some operations.
11796///
11798 switch (Op.getOpcode()) {
11799 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11800 case ISD::FPOW: return lowerPow(Op, DAG);
11801 case ISD::FSIN: return lowerSin(Op, DAG);
11802 case ISD::FCOS: return lowerCos(Op, DAG);
11803 case ISD::FLOG: return lowerLog(Op, DAG);
11804 case ISD::FLOG10: return lowerLog10(Op, DAG);
11805 case ISD::FEXP: return lowerExp(Op, DAG);
11806 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11807 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11808 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11809 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11810 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11811 case ISD::STRICT_FSETCC:
11813 case ISD::SETCC: return LowerSETCC(Op, DAG);
11814 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11815 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11816
11817 case ISD::INLINEASM:
11818 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11819 // Variable argument lowering.
11820 case ISD::VASTART: return LowerVASTART(Op, DAG);
11821 case ISD::VAARG: return LowerVAARG(Op, DAG);
11822 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11823
11824 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11825 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11827 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11828
11829 // Exception handling lowering.
11830 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11831 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11832 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11833
11834 case ISD::LOAD: return LowerLOAD(Op, DAG);
11835 case ISD::STORE: return LowerSTORE(Op, DAG);
11836 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11837 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11840 case ISD::FP_TO_UINT:
11841 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11844 case ISD::UINT_TO_FP:
11845 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11846 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
11847
11848 // Lower 64-bit shifts.
11849 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11850 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11851 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11852
11853 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11854 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11855
11856 // Vector-related lowering.
11857 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11858 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11859 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11860 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11861 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11862 case ISD::MUL: return LowerMUL(Op, DAG);
11863 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11865 case ISD::FP_ROUND:
11866 return LowerFP_ROUND(Op, DAG);
11867 case ISD::ROTL: return LowerROTL(Op, DAG);
11868
11869 // For counter-based loop handling.
11870 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11871
11872 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11873
11874 // Frame & Return address.
11875 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11876 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11877
11879 return LowerINTRINSIC_VOID(Op, DAG);
11880 case ISD::BSWAP:
11881 return LowerBSWAP(Op, DAG);
11883 return LowerATOMIC_CMP_SWAP(Op, DAG);
11884 case ISD::ATOMIC_STORE:
11885 return LowerATOMIC_LOAD_STORE(Op, DAG);
11886 case ISD::IS_FPCLASS:
11887 return LowerIS_FPCLASS(Op, DAG);
11888 }
11889}
11890
11893 SelectionDAG &DAG) const {
11894 SDLoc dl(N);
11895 switch (N->getOpcode()) {
11896 default:
11897 llvm_unreachable("Do not know how to custom type legalize this operation!");
11898 case ISD::ATOMIC_LOAD: {
11899 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11900 Results.push_back(Res);
11901 Results.push_back(Res.getValue(1));
11902 break;
11903 }
11904 case ISD::READCYCLECOUNTER: {
11905 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11906 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11907
11908 Results.push_back(
11909 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11910 Results.push_back(RTB.getValue(2));
11911 break;
11912 }
11914 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
11915 break;
11916
11917 assert(N->getValueType(0) == MVT::i1 &&
11918 "Unexpected result type for CTR decrement intrinsic");
11919 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11920 N->getValueType(0));
11921 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11922 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11923 N->getOperand(1));
11924
11925 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11926 Results.push_back(NewInt.getValue(1));
11927 break;
11928 }
11930 switch (N->getConstantOperandVal(0)) {
11931 case Intrinsic::ppc_pack_longdouble:
11932 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11933 N->getOperand(2), N->getOperand(1)));
11934 break;
11935 case Intrinsic::ppc_maxfe:
11936 case Intrinsic::ppc_minfe:
11937 case Intrinsic::ppc_fnmsub:
11938 case Intrinsic::ppc_convert_f128_to_ppcf128:
11939 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11940 break;
11941 }
11942 break;
11943 }
11944 case ISD::VAARG: {
11945 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11946 return;
11947
11948 EVT VT = N->getValueType(0);
11949
11950 if (VT == MVT::i64) {
11951 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11952
11953 Results.push_back(NewNode);
11954 Results.push_back(NewNode.getValue(1));
11955 }
11956 return;
11957 }
11960 case ISD::FP_TO_SINT:
11961 case ISD::FP_TO_UINT: {
11962 // LowerFP_TO_INT() can only handle f32 and f64.
11963 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11964 MVT::ppcf128)
11965 return;
11966 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11967 Results.push_back(LoweredValue);
11968 if (N->isStrictFPOpcode())
11969 Results.push_back(LoweredValue.getValue(1));
11970 return;
11971 }
11972 case ISD::TRUNCATE: {
11973 if (!N->getValueType(0).isVector())
11974 return;
11975 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11976 if (Lowered)
11977 Results.push_back(Lowered);
11978 return;
11979 }
11980 case ISD::FSHL:
11981 case ISD::FSHR:
11982 // Don't handle funnel shifts here.
11983 return;
11984 case ISD::BITCAST:
11985 // Don't handle bitcast here.
11986 return;
11987 case ISD::FP_EXTEND:
11988 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11989 if (Lowered)
11990 Results.push_back(Lowered);
11991 return;
11992 }
11993}
11994
11995//===----------------------------------------------------------------------===//
11996// Other Lowering Code
11997//===----------------------------------------------------------------------===//
11998
12000 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
12001 Function *Func = Intrinsic::getDeclaration(M, Id);
12002 return Builder.CreateCall(Func, {});
12003}
12004
12005// The mappings for emitLeading/TrailingFence is taken from
12006// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12008 Instruction *Inst,
12009 AtomicOrdering Ord) const {
12011 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12012 if (isReleaseOrStronger(Ord))
12013 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12014 return nullptr;
12015}
12016
12018 Instruction *Inst,
12019 AtomicOrdering Ord) const {
12020 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12021 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12022 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12023 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12024 if (isa<LoadInst>(Inst))
12025 return Builder.CreateCall(
12027 Builder.GetInsertBlock()->getParent()->getParent(),
12028 Intrinsic::ppc_cfence, {Inst->getType()}),
12029 {Inst});
12030 // FIXME: Can use isync for rmw operation.
12031 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12032 }
12033 return nullptr;
12034}
12035
12038 unsigned AtomicSize,
12039 unsigned BinOpcode,
12040 unsigned CmpOpcode,
12041 unsigned CmpPred) const {
12042 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12043 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12044
12045 auto LoadMnemonic = PPC::LDARX;
12046 auto StoreMnemonic = PPC::STDCX;
12047 switch (AtomicSize) {
12048 default:
12049 llvm_unreachable("Unexpected size of atomic entity");
12050 case 1:
12051 LoadMnemonic = PPC::LBARX;
12052 StoreMnemonic = PPC::STBCX;
12053 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12054 break;
12055 case 2:
12056 LoadMnemonic = PPC::LHARX;
12057 StoreMnemonic = PPC::STHCX;
12058 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12059 break;
12060 case 4:
12061 LoadMnemonic = PPC::LWARX;
12062 StoreMnemonic = PPC::STWCX;
12063 break;
12064 case 8:
12065 LoadMnemonic = PPC::LDARX;
12066 StoreMnemonic = PPC::STDCX;
12067 break;
12068 }
12069
12070 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12071 MachineFunction *F = BB->getParent();
12073
12074 Register dest = MI.getOperand(0).getReg();
12075 Register ptrA = MI.getOperand(1).getReg();
12076 Register ptrB = MI.getOperand(2).getReg();
12077 Register incr = MI.getOperand(3).getReg();
12078 DebugLoc dl = MI.getDebugLoc();
12079
12080 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12081 MachineBasicBlock *loop2MBB =
12082 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12083 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12084 F->insert(It, loopMBB);
12085 if (CmpOpcode)
12086 F->insert(It, loop2MBB);
12087 F->insert(It, exitMBB);
12088 exitMBB->splice(exitMBB->begin(), BB,
12089 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12091
12092 MachineRegisterInfo &RegInfo = F->getRegInfo();
12093 Register TmpReg = (!BinOpcode) ? incr :
12094 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12095 : &PPC::GPRCRegClass);
12096
12097 // thisMBB:
12098 // ...
12099 // fallthrough --> loopMBB
12100 BB->addSuccessor(loopMBB);
12101
12102 // loopMBB:
12103 // l[wd]arx dest, ptr
12104 // add r0, dest, incr
12105 // st[wd]cx. r0, ptr
12106 // bne- loopMBB
12107 // fallthrough --> exitMBB
12108
12109 // For max/min...
12110 // loopMBB:
12111 // l[wd]arx dest, ptr
12112 // cmpl?[wd] dest, incr
12113 // bgt exitMBB
12114 // loop2MBB:
12115 // st[wd]cx. dest, ptr
12116 // bne- loopMBB
12117 // fallthrough --> exitMBB
12118
12119 BB = loopMBB;
12120 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12121 .addReg(ptrA).addReg(ptrB);
12122 if (BinOpcode)
12123 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12124 if (CmpOpcode) {
12125 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12126 // Signed comparisons of byte or halfword values must be sign-extended.
12127 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12128 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12129 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12130 ExtReg).addReg(dest);
12131 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12132 } else
12133 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12134
12135 BuildMI(BB, dl, TII->get(PPC::BCC))
12136 .addImm(CmpPred)
12137 .addReg(CrReg)
12138 .addMBB(exitMBB);
12139 BB->addSuccessor(loop2MBB);
12140 BB->addSuccessor(exitMBB);
12141 BB = loop2MBB;
12142 }
12143 BuildMI(BB, dl, TII->get(StoreMnemonic))
12144 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12145 BuildMI(BB, dl, TII->get(PPC::BCC))
12146 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12147 BB->addSuccessor(loopMBB);
12148 BB->addSuccessor(exitMBB);
12149
12150 // exitMBB:
12151 // ...
12152 BB = exitMBB;
12153 return BB;
12154}
12155
12157 switch(MI.getOpcode()) {
12158 default:
12159 return false;
12160 case PPC::COPY:
12161 return TII->isSignExtended(MI.getOperand(1).getReg(),
12162 &MI.getMF()->getRegInfo());
12163 case PPC::LHA:
12164 case PPC::LHA8:
12165 case PPC::LHAU:
12166 case PPC::LHAU8:
12167 case PPC::LHAUX:
12168 case PPC::LHAUX8:
12169 case PPC::LHAX:
12170 case PPC::LHAX8:
12171 case PPC::LWA:
12172 case PPC::LWAUX:
12173 case PPC::LWAX:
12174 case PPC::LWAX_32:
12175 case PPC::LWA_32:
12176 case PPC::PLHA:
12177 case PPC::PLHA8:
12178 case PPC::PLHA8pc:
12179 case PPC::PLHApc:
12180 case PPC::PLWA:
12181 case PPC::PLWA8:
12182 case PPC::PLWA8pc:
12183 case PPC::PLWApc:
12184 case PPC::EXTSB:
12185 case PPC::EXTSB8:
12186 case PPC::EXTSB8_32_64:
12187 case PPC::EXTSB8_rec:
12188 case PPC::EXTSB_rec:
12189 case PPC::EXTSH:
12190 case PPC::EXTSH8:
12191 case PPC::EXTSH8_32_64:
12192 case PPC::EXTSH8_rec:
12193 case PPC::EXTSH_rec:
12194 case PPC::EXTSW:
12195 case PPC::EXTSWSLI:
12196 case PPC::EXTSWSLI_32_64:
12197 case PPC::EXTSWSLI_32_64_rec:
12198 case PPC::EXTSWSLI_rec:
12199 case PPC::EXTSW_32:
12200 case PPC::EXTSW_32_64:
12201 case PPC::EXTSW_32_64_rec:
12202 case PPC::EXTSW_rec:
12203 case PPC::SRAW:
12204 case PPC::SRAWI:
12205 case PPC::SRAWI_rec:
12206 case PPC::SRAW_rec:
12207 return true;
12208 }
12209 return false;
12210}
12211
12214 bool is8bit, // operation
12215 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12216 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12217 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12218
12219 // If this is a signed comparison and the value being compared is not known
12220 // to be sign extended, sign extend it here.
12221 DebugLoc dl = MI.getDebugLoc();
12222 MachineFunction *F = BB->getParent();
12223 MachineRegisterInfo &RegInfo = F->getRegInfo();
12224 Register incr = MI.getOperand(3).getReg();
12225 bool IsSignExtended =
12226 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12227
12228 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12229 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12230 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12231 .addReg(MI.getOperand(3).getReg());
12232 MI.getOperand(3).setReg(ValueReg);
12233 incr = ValueReg;
12234 }
12235 // If we support part-word atomic mnemonics, just use them
12236 if (Subtarget.hasPartwordAtomics())
12237 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12238 CmpPred);
12239
12240 // In 64 bit mode we have to use 64 bits for addresses, even though the
12241 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12242 // registers without caring whether they're 32 or 64, but here we're
12243 // doing actual arithmetic on the addresses.
12244 bool is64bit = Subtarget.isPPC64();
12245 bool isLittleEndian = Subtarget.isLittleEndian();
12246 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12247
12248 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12250
12251 Register dest = MI.getOperand(0).getReg();
12252 Register ptrA = MI.getOperand(1).getReg();
12253 Register ptrB = MI.getOperand(2).getReg();
12254
12255 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12256 MachineBasicBlock *loop2MBB =
12257 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12258 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12259 F->insert(It, loopMBB);
12260 if (CmpOpcode)
12261 F->insert(It, loop2MBB);
12262 F->insert(It, exitMBB);
12263 exitMBB->splice(exitMBB->begin(), BB,
12264 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12266
12267 const TargetRegisterClass *RC =
12268 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12269 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12270
12271 Register PtrReg = RegInfo.createVirtualRegister(RC);
12272 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12273 Register ShiftReg =
12274 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12275 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12276 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12277 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12278 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12279 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12280 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12281 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12282 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12283 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12284 Register Ptr1Reg;
12285 Register TmpReg =
12286 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12287
12288 // thisMBB:
12289 // ...
12290 // fallthrough --> loopMBB
12291 BB->addSuccessor(loopMBB);
12292
12293 // The 4-byte load must be aligned, while a char or short may be
12294 // anywhere in the word. Hence all this nasty bookkeeping code.
12295 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12296 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12297 // xori shift, shift1, 24 [16]
12298 // rlwinm ptr, ptr1, 0, 0, 29
12299 // slw incr2, incr, shift
12300 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12301 // slw mask, mask2, shift
12302 // loopMBB:
12303 // lwarx tmpDest, ptr
12304 // add tmp, tmpDest, incr2
12305 // andc tmp2, tmpDest, mask
12306 // and tmp3, tmp, mask
12307 // or tmp4, tmp3, tmp2
12308 // stwcx. tmp4, ptr
12309 // bne- loopMBB
12310 // fallthrough --> exitMBB
12311 // srw SrwDest, tmpDest, shift
12312 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12313 if (ptrA != ZeroReg) {
12314 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12315 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12316 .addReg(ptrA)
12317 .addReg(ptrB);
12318 } else {
12319 Ptr1Reg = ptrB;
12320 }
12321 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12322 // mode.
12323 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12324 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12325 .addImm(3)
12326 .addImm(27)
12327 .addImm(is8bit ? 28 : 27);
12328 if (!isLittleEndian)
12329 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12330 .addReg(Shift1Reg)
12331 .addImm(is8bit ? 24 : 16);
12332 if (is64bit)
12333 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12334 .addReg(Ptr1Reg)
12335 .addImm(0)
12336 .addImm(61);
12337 else
12338 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12339 .addReg(Ptr1Reg)
12340 .addImm(0)
12341 .addImm(0)
12342 .addImm(29);
12343 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12344 if (is8bit)
12345 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12346 else {
12347 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12348 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12349 .addReg(Mask3Reg)
12350 .addImm(65535);
12351 }
12352 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12353 .addReg(Mask2Reg)
12354 .addReg(ShiftReg);
12355
12356 BB = loopMBB;
12357 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12358 .addReg(ZeroReg)
12359 .addReg(PtrReg);
12360 if (BinOpcode)
12361 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12362 .addReg(Incr2Reg)
12363 .addReg(TmpDestReg);
12364 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12365 .addReg(TmpDestReg)
12366 .addReg(MaskReg);
12367 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12368 if (CmpOpcode) {
12369 // For unsigned comparisons, we can directly compare the shifted values.
12370 // For signed comparisons we shift and sign extend.
12371 Register SReg = RegInfo.createVirtualRegister(GPRC);
12372 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12373 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12374 .addReg(TmpDestReg)
12375 .addReg(MaskReg);
12376 unsigned ValueReg = SReg;
12377 unsigned CmpReg = Incr2Reg;
12378 if (CmpOpcode == PPC::CMPW) {
12379 ValueReg = RegInfo.createVirtualRegister(GPRC);
12380 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12381 .addReg(SReg)
12382 .addReg(ShiftReg);
12383 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12384 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12385 .addReg(ValueReg);
12386 ValueReg = ValueSReg;
12387 CmpReg = incr;
12388 }
12389 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12390 BuildMI(BB, dl, TII->get(PPC::BCC))
12391 .addImm(CmpPred)
12392 .addReg(CrReg)
12393 .addMBB(exitMBB);
12394 BB->addSuccessor(loop2MBB);
12395 BB->addSuccessor(exitMBB);
12396 BB = loop2MBB;
12397 }
12398 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12399 BuildMI(BB, dl, TII->get(PPC::STWCX))
12400 .addReg(Tmp4Reg)
12401 .addReg(ZeroReg)
12402 .addReg(PtrReg);
12403 BuildMI(BB, dl, TII->get(PPC::BCC))
12405 .addReg(PPC::CR0)
12406 .addMBB(loopMBB);
12407 BB->addSuccessor(loopMBB);
12408 BB->addSuccessor(exitMBB);
12409
12410 // exitMBB:
12411 // ...
12412 BB = exitMBB;
12413 // Since the shift amount is not a constant, we need to clear
12414 // the upper bits with a separate RLWINM.
12415 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12416 .addReg(SrwDestReg)
12417 .addImm(0)
12418 .addImm(is8bit ? 24 : 16)
12419 .addImm(31);
12420 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12421 .addReg(TmpDestReg)
12422 .addReg(ShiftReg);
12423 return BB;
12424}
12425
12428 MachineBasicBlock *MBB) const {
12429 DebugLoc DL = MI.getDebugLoc();
12430 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12431 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12432
12433 MachineFunction *MF = MBB->getParent();
12435
12436 const BasicBlock *BB = MBB->getBasicBlock();
12438
12439 Register DstReg = MI.getOperand(0).getReg();
12440 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12441 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12442 Register mainDstReg = MRI.createVirtualRegister(RC);
12443 Register restoreDstReg = MRI.createVirtualRegister(RC);
12444
12445 MVT PVT = getPointerTy(MF->getDataLayout());
12446 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12447 "Invalid Pointer Size!");
12448 // For v = setjmp(buf), we generate
12449 //
12450 // thisMBB:
12451 // SjLjSetup mainMBB
12452 // bl mainMBB
12453 // v_restore = 1
12454 // b sinkMBB
12455 //
12456 // mainMBB:
12457 // buf[LabelOffset] = LR
12458 // v_main = 0
12459 //
12460 // sinkMBB:
12461 // v = phi(main, restore)
12462 //
12463
12464 MachineBasicBlock *thisMBB = MBB;
12465 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12466 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12467 MF->insert(I, mainMBB);
12468 MF->insert(I, sinkMBB);
12469
12471
12472 // Transfer the remainder of BB and its successor edges to sinkMBB.
12473 sinkMBB->splice(sinkMBB->begin(), MBB,
12474 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12476
12477 // Note that the structure of the jmp_buf used here is not compatible
12478 // with that used by libc, and is not designed to be. Specifically, it
12479 // stores only those 'reserved' registers that LLVM does not otherwise
12480 // understand how to spill. Also, by convention, by the time this
12481 // intrinsic is called, Clang has already stored the frame address in the
12482 // first slot of the buffer and stack address in the third. Following the
12483 // X86 target code, we'll store the jump address in the second slot. We also
12484 // need to save the TOC pointer (R2) to handle jumps between shared
12485 // libraries, and that will be stored in the fourth slot. The thread
12486 // identifier (R13) is not affected.
12487
12488 // thisMBB:
12489 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12490 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12491 const int64_t BPOffset = 4 * PVT.getStoreSize();
12492
12493 // Prepare IP either in reg.
12494 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12495 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12496 Register BufReg = MI.getOperand(1).getReg();
12497
12498 if (Subtarget.is64BitELFABI()) {
12500 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12501 .addReg(PPC::X2)
12502 .addImm(TOCOffset)
12503 .addReg(BufReg)
12504 .cloneMemRefs(MI);
12505 }
12506
12507 // Naked functions never have a base pointer, and so we use r1. For all
12508 // other functions, this decision must be delayed until during PEI.
12509 unsigned BaseReg;
12510 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12511 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12512 else
12513 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12514
12515 MIB = BuildMI(*thisMBB, MI, DL,
12516 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12517 .addReg(BaseReg)
12518 .addImm(BPOffset)
12519 .addReg(BufReg)
12520 .cloneMemRefs(MI);
12521
12522 // Setup
12523 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12524 MIB.addRegMask(TRI->getNoPreservedMask());
12525
12526 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12527
12528 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12529 .addMBB(mainMBB);
12530 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12531
12532 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12533 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12534
12535 // mainMBB:
12536 // mainDstReg = 0
12537 MIB =
12538 BuildMI(mainMBB, DL,
12539 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12540
12541 // Store IP
12542 if (Subtarget.isPPC64()) {
12543 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12544 .addReg(LabelReg)
12545 .addImm(LabelOffset)
12546 .addReg(BufReg);
12547 } else {
12548 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12549 .addReg(LabelReg)
12550 .addImm(LabelOffset)
12551 .addReg(BufReg);
12552 }
12553 MIB.cloneMemRefs(MI);
12554
12555 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12556 mainMBB->addSuccessor(sinkMBB);
12557
12558 // sinkMBB:
12559 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12560 TII->get(PPC::PHI), DstReg)
12561 .addReg(mainDstReg).addMBB(mainMBB)
12562 .addReg(restoreDstReg).addMBB(thisMBB);
12563
12564 MI.eraseFromParent();
12565 return sinkMBB;
12566}
12567
12570 MachineBasicBlock *MBB) const {
12571 DebugLoc DL = MI.getDebugLoc();
12572 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12573
12574 MachineFunction *MF = MBB->getParent();
12576
12577 MVT PVT = getPointerTy(MF->getDataLayout());
12578 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12579 "Invalid Pointer Size!");
12580
12581 const TargetRegisterClass *RC =
12582 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12583 Register Tmp = MRI.createVirtualRegister(RC);
12584 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12585 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12586 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12587 unsigned BP =
12588 (PVT == MVT::i64)
12589 ? PPC::X30
12590 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12591 : PPC::R30);
12592
12594
12595 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12596 const int64_t SPOffset = 2 * PVT.getStoreSize();
12597 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12598 const int64_t BPOffset = 4 * PVT.getStoreSize();
12599
12600 Register BufReg = MI.getOperand(0).getReg();
12601
12602 // Reload FP (the jumped-to function may not have had a
12603 // frame pointer, and if so, then its r31 will be restored
12604 // as necessary).
12605 if (PVT == MVT::i64) {
12606 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12607 .addImm(0)
12608 .addReg(BufReg);
12609 } else {
12610 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12611 .addImm(0)
12612 .addReg(BufReg);
12613 }
12614 MIB.cloneMemRefs(MI);
12615
12616 // Reload IP
12617 if (PVT == MVT::i64) {
12618 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12619 .addImm(LabelOffset)
12620 .addReg(BufReg);
12621 } else {
12622 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12623 .addImm(LabelOffset)
12624 .addReg(BufReg);
12625 }
12626 MIB.cloneMemRefs(MI);
12627
12628 // Reload SP
12629 if (PVT == MVT::i64) {
12630 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12631 .addImm(SPOffset)
12632 .addReg(BufReg);
12633 } else {
12634 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12635 .addImm(SPOffset)
12636 .addReg(BufReg);
12637 }
12638 MIB.cloneMemRefs(MI);
12639
12640 // Reload BP
12641 if (PVT == MVT::i64) {
12642 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12643 .addImm(BPOffset)
12644 .addReg(BufReg);
12645 } else {
12646 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12647 .addImm(BPOffset)
12648 .addReg(BufReg);
12649 }
12650 MIB.cloneMemRefs(MI);
12651
12652 // Reload TOC
12653 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12655 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12656 .addImm(TOCOffset)
12657 .addReg(BufReg)
12658 .cloneMemRefs(MI);
12659 }
12660
12661 // Jump
12662 BuildMI(*MBB, MI, DL,
12663 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12664 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12665
12666 MI.eraseFromParent();
12667 return MBB;
12668}
12669
12671 // If the function specifically requests inline stack probes, emit them.
12672 if (MF.getFunction().hasFnAttribute("probe-stack"))
12673 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12674 "inline-asm";
12675 return false;
12676}
12677
12679 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12680 unsigned StackAlign = TFI->getStackAlignment();
12681 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12682 "Unexpected stack alignment");
12683 // The default stack probe size is 4096 if the function has no
12684 // stack-probe-size attribute.
12685 const Function &Fn = MF.getFunction();
12686 unsigned StackProbeSize =
12687 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12688 // Round down to the stack alignment.
12689 StackProbeSize &= ~(StackAlign - 1);
12690 return StackProbeSize ? StackProbeSize : StackAlign;
12691}
12692
12693// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12694// into three phases. In the first phase, it uses pseudo instruction
12695// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12696// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12697// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12698// MaxCallFrameSize so that it can calculate correct data area pointer.
12701 MachineBasicBlock *MBB) const {
12702 const bool isPPC64 = Subtarget.isPPC64();
12703 MachineFunction *MF = MBB->getParent();
12704 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12705 DebugLoc DL = MI.getDebugLoc();
12706 const unsigned ProbeSize = getStackProbeSize(*MF);
12707 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12709 // The CFG of probing stack looks as
12710 // +-----+
12711 // | MBB |
12712 // +--+--+
12713 // |
12714 // +----v----+
12715 // +--->+ TestMBB +---+
12716 // | +----+----+ |
12717 // | | |
12718 // | +-----v----+ |
12719 // +---+ BlockMBB | |
12720 // +----------+ |
12721 // |
12722 // +---------+ |
12723 // | TailMBB +<--+
12724 // +---------+
12725 // In MBB, calculate previous frame pointer and final stack pointer.
12726 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12727 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12728 // TailMBB is spliced via \p MI.
12729 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12730 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12731 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12732
12734 MF->insert(MBBIter, TestMBB);
12735 MF->insert(MBBIter, BlockMBB);
12736 MF->insert(MBBIter, TailMBB);
12737
12738 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12739 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12740
12741 Register DstReg = MI.getOperand(0).getReg();
12742 Register NegSizeReg = MI.getOperand(1).getReg();
12743 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12744 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12745 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12746 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12747
12748 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12749 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12750 // NegSize.
12751 unsigned ProbeOpc;
12752 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12753 ProbeOpc =
12754 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12755 else
12756 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12757 // and NegSizeReg will be allocated in the same phyreg to avoid
12758 // redundant copy when NegSizeReg has only one use which is current MI and
12759 // will be replaced by PREPARE_PROBED_ALLOCA then.
12760 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12761 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12762 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12763 .addDef(ActualNegSizeReg)
12764 .addReg(NegSizeReg)
12765 .add(MI.getOperand(2))
12766 .add(MI.getOperand(3));
12767
12768 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12769 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12770 FinalStackPtr)
12771 .addReg(SPReg)
12772 .addReg(ActualNegSizeReg);
12773
12774 // Materialize a scratch register for update.
12775 int64_t NegProbeSize = -(int64_t)ProbeSize;
12776 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12777 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12778 if (!isInt<16>(NegProbeSize)) {
12779 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12780 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12781 .addImm(NegProbeSize >> 16);
12782 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12783 ScratchReg)
12784 .addReg(TempReg)
12785 .addImm(NegProbeSize & 0xFFFF);
12786 } else
12787 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12788 .addImm(NegProbeSize);
12789
12790 {
12791 // Probing leading residual part.
12792 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12793 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12794 .addReg(ActualNegSizeReg)
12795 .addReg(ScratchReg);
12796 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12797 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12798 .addReg(Div)
12799 .addReg(ScratchReg);
12800 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12801 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12802 .addReg(Mul)
12803 .addReg(ActualNegSizeReg);
12804 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12805 .addReg(FramePointer)
12806 .addReg(SPReg)
12807 .addReg(NegMod);
12808 }
12809
12810 {
12811 // Remaining part should be multiple of ProbeSize.
12812 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12813 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12814 .addReg(SPReg)
12815 .addReg(FinalStackPtr);
12816 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12818 .addReg(CmpResult)
12819 .addMBB(TailMBB);
12820 TestMBB->addSuccessor(BlockMBB);
12821 TestMBB->addSuccessor(TailMBB);
12822 }
12823
12824 {
12825 // Touch the block.
12826 // |P...|P...|P...
12827 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12828 .addReg(FramePointer)
12829 .addReg(SPReg)
12830 .addReg(ScratchReg);
12831 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12832 BlockMBB->addSuccessor(TestMBB);
12833 }
12834
12835 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12836 // DYNAREAOFFSET pseudo instruction to get the future result.
12837 Register MaxCallFrameSizeReg =
12838 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12839 BuildMI(TailMBB, DL,
12840 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12841 MaxCallFrameSizeReg)
12842 .add(MI.getOperand(2))
12843 .add(MI.getOperand(3));
12844 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12845 .addReg(SPReg)
12846 .addReg(MaxCallFrameSizeReg);
12847
12848 // Splice instructions after MI to TailMBB.
12849 TailMBB->splice(TailMBB->end(), MBB,
12850 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12852 MBB->addSuccessor(TestMBB);
12853
12854 // Delete the pseudo instruction.
12855 MI.eraseFromParent();
12856
12857 ++NumDynamicAllocaProbed;
12858 return TailMBB;
12859}
12860
12862 switch (MI.getOpcode()) {
12863 case PPC::SELECT_CC_I4:
12864 case PPC::SELECT_CC_I8:
12865 case PPC::SELECT_CC_F4:
12866 case PPC::SELECT_CC_F8:
12867 case PPC::SELECT_CC_F16:
12868 case PPC::SELECT_CC_VRRC:
12869 case PPC::SELECT_CC_VSFRC:
12870 case PPC::SELECT_CC_VSSRC:
12871 case PPC::SELECT_CC_VSRC:
12872 case PPC::SELECT_CC_SPE4:
12873 case PPC::SELECT_CC_SPE:
12874 return true;
12875 default:
12876 return false;
12877 }
12878}
12879
12880static bool IsSelect(MachineInstr &MI) {
12881 switch (MI.getOpcode()) {
12882 case PPC::SELECT_I4:
12883 case PPC::SELECT_I8:
12884 case PPC::SELECT_F4:
12885 case PPC::SELECT_F8:
12886 case PPC::SELECT_F16:
12887 case PPC::SELECT_SPE:
12888 case PPC::SELECT_SPE4:
12889 case PPC::SELECT_VRRC:
12890 case PPC::SELECT_VSFRC:
12891 case PPC::SELECT_VSSRC:
12892 case PPC::SELECT_VSRC:
12893 return true;
12894 default:
12895 return false;
12896 }
12897}
12898
12901 MachineBasicBlock *BB) const {
12902 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12903 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12904 if (Subtarget.is64BitELFABI() &&
12905 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12906 !Subtarget.isUsingPCRelativeCalls()) {
12907 // Call lowering should have added an r2 operand to indicate a dependence
12908 // on the TOC base pointer value. It can't however, because there is no
12909 // way to mark the dependence as implicit there, and so the stackmap code
12910 // will confuse it with a regular operand. Instead, add the dependence
12911 // here.
12912 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12913 }
12914
12915 return emitPatchPoint(MI, BB);
12916 }
12917
12918 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12919 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12920 return emitEHSjLjSetJmp(MI, BB);
12921 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12922 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12923 return emitEHSjLjLongJmp(MI, BB);
12924 }
12925
12926 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12927
12928 // To "insert" these instructions we actually have to insert their
12929 // control-flow patterns.
12930 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12932
12933 MachineFunction *F = BB->getParent();
12934 MachineRegisterInfo &MRI = F->getRegInfo();
12935
12936 if (Subtarget.hasISEL() &&
12937 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12938 MI.getOpcode() == PPC::SELECT_CC_I8 ||
12939 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
12941 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12942 MI.getOpcode() == PPC::SELECT_CC_I8)
12943 Cond.push_back(MI.getOperand(4));
12944 else
12946 Cond.push_back(MI.getOperand(1));
12947
12948 DebugLoc dl = MI.getDebugLoc();
12949 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12950 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12951 } else if (IsSelectCC(MI) || IsSelect(MI)) {
12952 // The incoming instruction knows the destination vreg to set, the
12953 // condition code register to branch on, the true/false values to
12954 // select between, and a branch opcode to use.
12955
12956 // thisMBB:
12957 // ...
12958 // TrueVal = ...
12959 // cmpTY ccX, r1, r2
12960 // bCC sinkMBB
12961 // fallthrough --> copy0MBB
12962 MachineBasicBlock *thisMBB = BB;
12963 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12964 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12965 DebugLoc dl = MI.getDebugLoc();
12966 F->insert(It, copy0MBB);
12967 F->insert(It, sinkMBB);
12968
12969 // Set the call frame size on entry to the new basic blocks.
12970 // See https://reviews.llvm.org/D156113.
12971 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12972 copy0MBB->setCallFrameSize(CallFrameSize);
12973 sinkMBB->setCallFrameSize(CallFrameSize);
12974
12975 // Transfer the remainder of BB and its successor edges to sinkMBB.
12976 sinkMBB->splice(sinkMBB->begin(), BB,
12977 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12979
12980 // Next, add the true and fallthrough blocks as its successors.
12981 BB->addSuccessor(copy0MBB);
12982 BB->addSuccessor(sinkMBB);
12983
12984 if (IsSelect(MI)) {
12985 BuildMI(BB, dl, TII->get(PPC::BC))
12986 .addReg(MI.getOperand(1).getReg())
12987 .addMBB(sinkMBB);
12988 } else {
12989 unsigned SelectPred = MI.getOperand(4).getImm();
12990 BuildMI(BB, dl, TII->get(PPC::BCC))
12991 .addImm(SelectPred)
12992 .addReg(MI.getOperand(1).getReg())
12993 .addMBB(sinkMBB);
12994 }
12995
12996 // copy0MBB:
12997 // %FalseValue = ...
12998 // # fallthrough to sinkMBB
12999 BB = copy0MBB;
13000
13001 // Update machine-CFG edges
13002 BB->addSuccessor(sinkMBB);
13003
13004 // sinkMBB:
13005 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13006 // ...
13007 BB = sinkMBB;
13008 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13009 .addReg(MI.getOperand(3).getReg())
13010 .addMBB(copy0MBB)
13011 .addReg(MI.getOperand(2).getReg())
13012 .addMBB(thisMBB);
13013 } else if (MI.getOpcode() == PPC::ReadTB) {
13014 // To read the 64-bit time-base register on a 32-bit target, we read the
13015 // two halves. Should the counter have wrapped while it was being read, we
13016 // need to try again.
13017 // ...
13018 // readLoop:
13019 // mfspr Rx,TBU # load from TBU
13020 // mfspr Ry,TB # load from TB
13021 // mfspr Rz,TBU # load from TBU
13022 // cmpw crX,Rx,Rz # check if 'old'='new'
13023 // bne readLoop # branch if they're not equal
13024 // ...
13025
13026 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13027 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13028 DebugLoc dl = MI.getDebugLoc();
13029 F->insert(It, readMBB);
13030 F->insert(It, sinkMBB);
13031
13032 // Transfer the remainder of BB and its successor edges to sinkMBB.
13033 sinkMBB->splice(sinkMBB->begin(), BB,
13034 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13036
13037 BB->addSuccessor(readMBB);
13038 BB = readMBB;
13039
13040 MachineRegisterInfo &RegInfo = F->getRegInfo();
13041 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13042 Register LoReg = MI.getOperand(0).getReg();
13043 Register HiReg = MI.getOperand(1).getReg();
13044
13045 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13046 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13047 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13048
13049 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13050
13051 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13052 .addReg(HiReg)
13053 .addReg(ReadAgainReg);
13054 BuildMI(BB, dl, TII->get(PPC::BCC))
13056 .addReg(CmpReg)
13057 .addMBB(readMBB);
13058
13059 BB->addSuccessor(readMBB);
13060 BB->addSuccessor(sinkMBB);
13061 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13062 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13063 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13064 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13065 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13066 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13067 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13068 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13069
13070 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13071 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13072 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13073 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13074 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13075 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13076 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13077 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13078
13079 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13080 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13081 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13082 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13083 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13084 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13085 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13086 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13087
13088 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13089 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13090 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13091 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13092 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13093 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13094 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13095 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13096
13097 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13098 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13099 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13100 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13101 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13102 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13103 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13104 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13105
13106 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13107 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13108 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13109 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13110 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13111 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13112 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13113 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13114
13115 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13116 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13117 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13118 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13119 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13120 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13121 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13122 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13123
13124 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13125 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13126 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13127 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13128 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13129 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13130 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13131 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13132
13133 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13134 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13135 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13136 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13137 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13138 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13139 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13140 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13141
13142 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13143 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13144 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13145 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13146 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13147 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13148 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13149 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13150
13151 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13152 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13153 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13154 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13155 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13156 BB = EmitAtomicBinary(MI, BB, 4, 0);
13157 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13158 BB = EmitAtomicBinary(MI, BB, 8, 0);
13159 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13160 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13161 (Subtarget.hasPartwordAtomics() &&
13162 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13163 (Subtarget.hasPartwordAtomics() &&
13164 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13165 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13166
13167 auto LoadMnemonic = PPC::LDARX;
13168 auto StoreMnemonic = PPC::STDCX;
13169 switch (MI.getOpcode()) {
13170 default:
13171 llvm_unreachable("Compare and swap of unknown size");
13172 case PPC::ATOMIC_CMP_SWAP_I8:
13173 LoadMnemonic = PPC::LBARX;
13174 StoreMnemonic = PPC::STBCX;
13175 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13176 break;
13177 case PPC::ATOMIC_CMP_SWAP_I16:
13178 LoadMnemonic = PPC::LHARX;
13179 StoreMnemonic = PPC::STHCX;
13180 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13181 break;
13182 case PPC::ATOMIC_CMP_SWAP_I32:
13183 LoadMnemonic = PPC::LWARX;
13184 StoreMnemonic = PPC::STWCX;
13185 break;
13186 case PPC::ATOMIC_CMP_SWAP_I64:
13187 LoadMnemonic = PPC::LDARX;
13188 StoreMnemonic = PPC::STDCX;
13189 break;
13190 }
13191 MachineRegisterInfo &RegInfo = F->getRegInfo();
13192 Register dest = MI.getOperand(0).getReg();
13193 Register ptrA = MI.getOperand(1).getReg();
13194 Register ptrB = MI.getOperand(2).getReg();
13195 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13196 Register oldval = MI.getOperand(3).getReg();
13197 Register newval = MI.getOperand(4).getReg();
13198 DebugLoc dl = MI.getDebugLoc();
13199
13200 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13201 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13202 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13203 F->insert(It, loop1MBB);
13204 F->insert(It, loop2MBB);
13205 F->insert(It, exitMBB);
13206 exitMBB->splice(exitMBB->begin(), BB,
13207 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13209
13210 // thisMBB:
13211 // ...
13212 // fallthrough --> loopMBB
13213 BB->addSuccessor(loop1MBB);
13214
13215 // loop1MBB:
13216 // l[bhwd]arx dest, ptr
13217 // cmp[wd] dest, oldval
13218 // bne- exitBB
13219 // loop2MBB:
13220 // st[bhwd]cx. newval, ptr
13221 // bne- loopMBB
13222 // b exitBB
13223 // exitBB:
13224 BB = loop1MBB;
13225 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13226 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13227 .addReg(dest)
13228 .addReg(oldval);
13229 BuildMI(BB, dl, TII->get(PPC::BCC))
13231 .addReg(CrReg)
13232 .addMBB(exitMBB);
13233 BB->addSuccessor(loop2MBB);
13234 BB->addSuccessor(exitMBB);
13235
13236 BB = loop2MBB;
13237 BuildMI(BB, dl, TII->get(StoreMnemonic))
13238 .addReg(newval)
13239 .addReg(ptrA)
13240 .addReg(ptrB);
13241 BuildMI(BB, dl, TII->get(PPC::BCC))
13243 .addReg(PPC::CR0)
13244 .addMBB(loop1MBB);
13245 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13246 BB->addSuccessor(loop1MBB);
13247 BB->addSuccessor(exitMBB);
13248
13249 // exitMBB:
13250 // ...
13251 BB = exitMBB;
13252 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13253 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13254 // We must use 64-bit registers for addresses when targeting 64-bit,
13255 // since we're actually doing arithmetic on them. Other registers
13256 // can be 32-bit.
13257 bool is64bit = Subtarget.isPPC64();
13258 bool isLittleEndian = Subtarget.isLittleEndian();
13259 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13260
13261 Register dest = MI.getOperand(0).getReg();
13262 Register ptrA = MI.getOperand(1).getReg();
13263 Register ptrB = MI.getOperand(2).getReg();
13264 Register oldval = MI.getOperand(3).getReg();
13265 Register newval = MI.getOperand(4).getReg();
13266 DebugLoc dl = MI.getDebugLoc();
13267
13268 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13269 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13270 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13271 F->insert(It, loop1MBB);
13272 F->insert(It, loop2MBB);
13273 F->insert(It, exitMBB);
13274 exitMBB->splice(exitMBB->begin(), BB,
13275 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13277
13278 MachineRegisterInfo &RegInfo = F->getRegInfo();
13279 const TargetRegisterClass *RC =
13280 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13281 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13282
13283 Register PtrReg = RegInfo.createVirtualRegister(RC);
13284 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13285 Register ShiftReg =
13286 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13287 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13288 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13289 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13290 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13291 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13292 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13293 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13294 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13295 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13296 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13297 Register Ptr1Reg;
13298 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13299 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13300 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13301 // thisMBB:
13302 // ...
13303 // fallthrough --> loopMBB
13304 BB->addSuccessor(loop1MBB);
13305
13306 // The 4-byte load must be aligned, while a char or short may be
13307 // anywhere in the word. Hence all this nasty bookkeeping code.
13308 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13309 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13310 // xori shift, shift1, 24 [16]
13311 // rlwinm ptr, ptr1, 0, 0, 29
13312 // slw newval2, newval, shift
13313 // slw oldval2, oldval,shift
13314 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13315 // slw mask, mask2, shift
13316 // and newval3, newval2, mask
13317 // and oldval3, oldval2, mask
13318 // loop1MBB:
13319 // lwarx tmpDest, ptr
13320 // and tmp, tmpDest, mask
13321 // cmpw tmp, oldval3
13322 // bne- exitBB
13323 // loop2MBB:
13324 // andc tmp2, tmpDest, mask
13325 // or tmp4, tmp2, newval3
13326 // stwcx. tmp4, ptr
13327 // bne- loop1MBB
13328 // b exitBB
13329 // exitBB:
13330 // srw dest, tmpDest, shift
13331 if (ptrA != ZeroReg) {
13332 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13333 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13334 .addReg(ptrA)
13335 .addReg(ptrB);
13336 } else {
13337 Ptr1Reg = ptrB;
13338 }
13339
13340 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13341 // mode.
13342 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13343 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13344 .addImm(3)
13345 .addImm(27)
13346 .addImm(is8bit ? 28 : 27);
13347 if (!isLittleEndian)
13348 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13349 .addReg(Shift1Reg)
13350 .addImm(is8bit ? 24 : 16);
13351 if (is64bit)
13352 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13353 .addReg(Ptr1Reg)
13354 .addImm(0)
13355 .addImm(61);
13356 else
13357 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13358 .addReg(Ptr1Reg)
13359 .addImm(0)
13360 .addImm(0)
13361 .addImm(29);
13362 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13363 .addReg(newval)
13364 .addReg(ShiftReg);
13365 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13366 .addReg(oldval)
13367 .addReg(ShiftReg);
13368 if (is8bit)
13369 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13370 else {
13371 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13372 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13373 .addReg(Mask3Reg)
13374 .addImm(65535);
13375 }
13376 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13377 .addReg(Mask2Reg)
13378 .addReg(ShiftReg);
13379 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13380 .addReg(NewVal2Reg)
13381 .addReg(MaskReg);
13382 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13383 .addReg(OldVal2Reg)
13384 .addReg(MaskReg);
13385
13386 BB = loop1MBB;
13387 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13388 .addReg(ZeroReg)
13389 .addReg(PtrReg);
13390 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13391 .addReg(TmpDestReg)
13392 .addReg(MaskReg);
13393 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13394 .addReg(TmpReg)
13395 .addReg(OldVal3Reg);
13396 BuildMI(BB, dl, TII->get(PPC::BCC))
13398 .addReg(CrReg)
13399 .addMBB(exitMBB);
13400 BB->addSuccessor(loop2MBB);
13401 BB->addSuccessor(exitMBB);
13402
13403 BB = loop2MBB;
13404 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13405 .addReg(TmpDestReg)
13406 .addReg(MaskReg);
13407 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13408 .addReg(Tmp2Reg)
13409 .addReg(NewVal3Reg);
13410 BuildMI(BB, dl, TII->get(PPC::STWCX))
13411 .addReg(Tmp4Reg)
13412 .addReg(ZeroReg)
13413 .addReg(PtrReg);
13414 BuildMI(BB, dl, TII->get(PPC::BCC))
13416 .addReg(PPC::CR0)
13417 .addMBB(loop1MBB);
13418 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13419 BB->addSuccessor(loop1MBB);
13420 BB->addSuccessor(exitMBB);
13421
13422 // exitMBB:
13423 // ...
13424 BB = exitMBB;
13425 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13426 .addReg(TmpReg)
13427 .addReg(ShiftReg);
13428 } else if (MI.getOpcode() == PPC::FADDrtz) {
13429 // This pseudo performs an FADD with rounding mode temporarily forced
13430 // to round-to-zero. We emit this via custom inserter since the FPSCR
13431 // is not modeled at the SelectionDAG level.
13432 Register Dest = MI.getOperand(0).getReg();
13433 Register Src1 = MI.getOperand(1).getReg();
13434 Register Src2 = MI.getOperand(2).getReg();
13435 DebugLoc dl = MI.getDebugLoc();
13436
13437 MachineRegisterInfo &RegInfo = F->getRegInfo();
13438 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13439
13440 // Save FPSCR value.
13441 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13442
13443 // Set rounding mode to round-to-zero.
13444 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13445 .addImm(31)
13447
13448 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13449 .addImm(30)
13451
13452 // Perform addition.
13453 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13454 .addReg(Src1)
13455 .addReg(Src2);
13456 if (MI.getFlag(MachineInstr::NoFPExcept))
13458
13459 // Restore FPSCR value.
13460 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13461 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13462 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13463 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13464 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13465 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13466 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13467 ? PPC::ANDI8_rec
13468 : PPC::ANDI_rec;
13469 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13470 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13471
13472 MachineRegisterInfo &RegInfo = F->getRegInfo();
13473 Register Dest = RegInfo.createVirtualRegister(
13474 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13475
13476 DebugLoc Dl = MI.getDebugLoc();
13477 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13478 .addReg(MI.getOperand(1).getReg())
13479 .addImm(1);
13480 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13481 MI.getOperand(0).getReg())
13482 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13483 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13484 DebugLoc Dl = MI.getDebugLoc();
13485 MachineRegisterInfo &RegInfo = F->getRegInfo();
13486 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13487 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13488 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13489 MI.getOperand(0).getReg())
13490 .addReg(CRReg);
13491 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13492 DebugLoc Dl = MI.getDebugLoc();
13493 unsigned Imm = MI.getOperand(1).getImm();
13494 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13495 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13496 MI.getOperand(0).getReg())
13497 .addReg(PPC::CR0EQ);
13498 } else if (MI.getOpcode() == PPC::SETRNDi) {
13499 DebugLoc dl = MI.getDebugLoc();
13500 Register OldFPSCRReg = MI.getOperand(0).getReg();
13501
13502 // Save FPSCR value.
13503 if (MRI.use_empty(OldFPSCRReg))
13504 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13505 else
13506 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13507
13508 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13509 // the following settings:
13510 // 00 Round to nearest
13511 // 01 Round to 0
13512 // 10 Round to +inf
13513 // 11 Round to -inf
13514
13515 // When the operand is immediate, using the two least significant bits of
13516 // the immediate to set the bits 62:63 of FPSCR.
13517 unsigned Mode = MI.getOperand(1).getImm();
13518 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13519 .addImm(31)
13521
13522 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13523 .addImm(30)
13525 } else if (MI.getOpcode() == PPC::SETRND) {
13526 DebugLoc dl = MI.getDebugLoc();
13527
13528 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13529 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13530 // If the target doesn't have DirectMove, we should use stack to do the
13531 // conversion, because the target doesn't have the instructions like mtvsrd
13532 // or mfvsrd to do this conversion directly.
13533 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13534 if (Subtarget.hasDirectMove()) {
13535 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13536 .addReg(SrcReg);
13537 } else {
13538 // Use stack to do the register copy.
13539 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13540 MachineRegisterInfo &RegInfo = F->getRegInfo();
13541 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13542 if (RC == &PPC::F8RCRegClass) {
13543 // Copy register from F8RCRegClass to G8RCRegclass.
13544 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13545 "Unsupported RegClass.");
13546
13547 StoreOp = PPC::STFD;
13548 LoadOp = PPC::LD;
13549 } else {
13550 // Copy register from G8RCRegClass to F8RCRegclass.
13551 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13552 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13553 "Unsupported RegClass.");
13554 }
13555
13556 MachineFrameInfo &MFI = F->getFrameInfo();
13557 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13558
13559 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13560 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13562 MFI.getObjectAlign(FrameIdx));
13563
13564 // Store the SrcReg into the stack.
13565 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13566 .addReg(SrcReg)
13567 .addImm(0)
13568 .addFrameIndex(FrameIdx)
13569 .addMemOperand(MMOStore);
13570
13571 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13572 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13574 MFI.getObjectAlign(FrameIdx));
13575
13576 // Load from the stack where SrcReg is stored, and save to DestReg,
13577 // so we have done the RegClass conversion from RegClass::SrcReg to
13578 // RegClass::DestReg.
13579 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13580 .addImm(0)
13581 .addFrameIndex(FrameIdx)
13582 .addMemOperand(MMOLoad);
13583 }
13584 };
13585
13586 Register OldFPSCRReg = MI.getOperand(0).getReg();
13587
13588 // Save FPSCR value.
13589 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13590
13591 // When the operand is gprc register, use two least significant bits of the
13592 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13593 //
13594 // copy OldFPSCRTmpReg, OldFPSCRReg
13595 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13596 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13597 // copy NewFPSCRReg, NewFPSCRTmpReg
13598 // mtfsf 255, NewFPSCRReg
13599 MachineOperand SrcOp = MI.getOperand(1);
13600 MachineRegisterInfo &RegInfo = F->getRegInfo();
13601 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13602
13603 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13604
13605 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13606 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13607
13608 // The first operand of INSERT_SUBREG should be a register which has
13609 // subregisters, we only care about its RegClass, so we should use an
13610 // IMPLICIT_DEF register.
13611 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13612 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13613 .addReg(ImDefReg)
13614 .add(SrcOp)
13615 .addImm(1);
13616
13617 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13618 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13619 .addReg(OldFPSCRTmpReg)
13620 .addReg(ExtSrcReg)
13621 .addImm(0)
13622 .addImm(62);
13623
13624 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13625 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13626
13627 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13628 // bits of FPSCR.
13629 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13630 .addImm(255)
13631 .addReg(NewFPSCRReg)
13632 .addImm(0)
13633 .addImm(0);
13634 } else if (MI.getOpcode() == PPC::SETFLM) {
13635 DebugLoc Dl = MI.getDebugLoc();
13636
13637 // Result of setflm is previous FPSCR content, so we need to save it first.
13638 Register OldFPSCRReg = MI.getOperand(0).getReg();
13639 if (MRI.use_empty(OldFPSCRReg))
13640 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13641 else
13642 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13643
13644 // Put bits in 32:63 to FPSCR.
13645 Register NewFPSCRReg = MI.getOperand(1).getReg();
13646 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13647 .addImm(255)
13648 .addReg(NewFPSCRReg)
13649 .addImm(0)
13650 .addImm(0);
13651 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13652 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13653 return emitProbedAlloca(MI, BB);
13654 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13655 DebugLoc DL = MI.getDebugLoc();
13656 Register Src = MI.getOperand(2).getReg();
13657 Register Lo = MI.getOperand(0).getReg();
13658 Register Hi = MI.getOperand(1).getReg();
13659 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13660 .addDef(Lo)
13661 .addUse(Src, 0, PPC::sub_gp8_x1);
13662 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13663 .addDef(Hi)
13664 .addUse(Src, 0, PPC::sub_gp8_x0);
13665 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13666 MI.getOpcode() == PPC::STQX_PSEUDO) {
13667 DebugLoc DL = MI.getDebugLoc();
13668 // Ptr is used as the ptr_rc_no_r0 part
13669 // of LQ/STQ's memory operand and adding result of RA and RB,
13670 // so it has to be g8rc_and_g8rc_nox0.
13671 Register Ptr =
13672 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13673 Register Val = MI.getOperand(0).getReg();
13674 Register RA = MI.getOperand(1).getReg();
13675 Register RB = MI.getOperand(2).getReg();
13676 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13677 BuildMI(*BB, MI, DL,
13678 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13679 : TII->get(PPC::STQ))
13680 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13681 .addImm(0)
13682 .addReg(Ptr);
13683 } else {
13684 llvm_unreachable("Unexpected instr type to insert");
13685 }
13686
13687 MI.eraseFromParent(); // The pseudo instruction is gone now.
13688 return BB;
13689}
13690
13691//===----------------------------------------------------------------------===//
13692// Target Optimization Hooks
13693//===----------------------------------------------------------------------===//
13694
13695static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13696 // For the estimates, convergence is quadratic, so we essentially double the
13697 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13698 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13699 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13700 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13701 if (VT.getScalarType() == MVT::f64)
13702 RefinementSteps++;
13703 return RefinementSteps;
13704}
13705
13706SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13707 const DenormalMode &Mode) const {
13708 // We only have VSX Vector Test for software Square Root.
13709 EVT VT = Op.getValueType();
13710 if (!isTypeLegal(MVT::i1) ||
13711 (VT != MVT::f64 &&
13712 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13713 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13714
13715 SDLoc DL(Op);
13716 // The output register of FTSQRT is CR field.
13717 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13718 // ftsqrt BF,FRB
13719 // Let e_b be the unbiased exponent of the double-precision
13720 // floating-point operand in register FRB.
13721 // fe_flag is set to 1 if either of the following conditions occurs.
13722 // - The double-precision floating-point operand in register FRB is a zero,
13723 // a NaN, or an infinity, or a negative value.
13724 // - e_b is less than or equal to -970.
13725 // Otherwise fe_flag is set to 0.
13726 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13727 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13728 // exponent is less than -970)
13729 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13730 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13731 FTSQRT, SRIdxVal),
13732 0);
13733}
13734
13735SDValue
13736PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13737 SelectionDAG &DAG) const {
13738 // We only have VSX Vector Square Root.
13739 EVT VT = Op.getValueType();
13740 if (VT != MVT::f64 &&
13741 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13743
13744 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13745}
13746
13747SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13748 int Enabled, int &RefinementSteps,
13749 bool &UseOneConstNR,
13750 bool Reciprocal) const {
13751 EVT VT = Operand.getValueType();
13752 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13753 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13754 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13755 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13756 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13757 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13758
13759 // The Newton-Raphson computation with a single constant does not provide
13760 // enough accuracy on some CPUs.
13761 UseOneConstNR = !Subtarget.needsTwoConstNR();
13762 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13763 }
13764 return SDValue();
13765}
13766
13767SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13768 int Enabled,
13769 int &RefinementSteps) const {
13770 EVT VT = Operand.getValueType();
13771 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13772 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13773 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13774 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13775 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13776 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13777 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13778 }
13779 return SDValue();
13780}
13781
13782unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13783 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13784 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13785 // enabled for division), this functionality is redundant with the default
13786 // combiner logic (once the division -> reciprocal/multiply transformation
13787 // has taken place). As a result, this matters more for older cores than for
13788 // newer ones.
13789
13790 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13791 // reciprocal if there are two or more FDIVs (for embedded cores with only
13792 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13793 switch (Subtarget.getCPUDirective()) {
13794 default:
13795 return 3;
13796 case PPC::DIR_440:
13797 case PPC::DIR_A2:
13798 case PPC::DIR_E500:
13799 case PPC::DIR_E500mc:
13800 case PPC::DIR_E5500:
13801 return 2;
13802 }
13803}
13804
13805// isConsecutiveLSLoc needs to work even if all adds have not yet been
13806// collapsed, and so we need to look through chains of them.
13808 int64_t& Offset, SelectionDAG &DAG) {
13809 if (DAG.isBaseWithConstantOffset(Loc)) {
13810 Base = Loc.getOperand(0);
13811 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13812
13813 // The base might itself be a base plus an offset, and if so, accumulate
13814 // that as well.
13816 }
13817}
13818
13820 unsigned Bytes, int Dist,
13821 SelectionDAG &DAG) {
13822 if (VT.getSizeInBits() / 8 != Bytes)
13823 return false;
13824
13825 SDValue BaseLoc = Base->getBasePtr();
13826 if (Loc.getOpcode() == ISD::FrameIndex) {
13827 if (BaseLoc.getOpcode() != ISD::FrameIndex)
13828 return false;
13830 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13831 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13832 int FS = MFI.getObjectSize(FI);
13833 int BFS = MFI.getObjectSize(BFI);
13834 if (FS != BFS || FS != (int)Bytes) return false;
13835 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13836 }
13837
13838 SDValue Base1 = Loc, Base2 = BaseLoc;
13839 int64_t Offset1 = 0, Offset2 = 0;
13840 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13841 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13842 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13843 return true;
13844
13845 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13846 const GlobalValue *GV1 = nullptr;
13847 const GlobalValue *GV2 = nullptr;
13848 Offset1 = 0;
13849 Offset2 = 0;
13850 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13851 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13852 if (isGA1 && isGA2 && GV1 == GV2)
13853 return Offset1 == (Offset2 + Dist*Bytes);
13854 return false;
13855}
13856
13857// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13858// not enforce equality of the chain operands.
13860 unsigned Bytes, int Dist,
13861 SelectionDAG &DAG) {
13862 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13863 EVT VT = LS->getMemoryVT();
13864 SDValue Loc = LS->getBasePtr();
13865 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13866 }
13867
13868 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13869 EVT VT;
13870 switch (N->getConstantOperandVal(1)) {
13871 default: return false;
13872 case Intrinsic::ppc_altivec_lvx:
13873 case Intrinsic::ppc_altivec_lvxl:
13874 case Intrinsic::ppc_vsx_lxvw4x:
13875 case Intrinsic::ppc_vsx_lxvw4x_be:
13876 VT = MVT::v4i32;
13877 break;
13878 case Intrinsic::ppc_vsx_lxvd2x:
13879 case Intrinsic::ppc_vsx_lxvd2x_be:
13880 VT = MVT::v2f64;
13881 break;
13882 case Intrinsic::ppc_altivec_lvebx:
13883 VT = MVT::i8;
13884 break;
13885 case Intrinsic::ppc_altivec_lvehx:
13886 VT = MVT::i16;
13887 break;
13888 case Intrinsic::ppc_altivec_lvewx:
13889 VT = MVT::i32;
13890 break;
13891 }
13892
13893 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13894 }
13895
13896 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13897 EVT VT;
13898 switch (N->getConstantOperandVal(1)) {
13899 default: return false;
13900 case Intrinsic::ppc_altivec_stvx:
13901 case Intrinsic::ppc_altivec_stvxl:
13902 case Intrinsic::ppc_vsx_stxvw4x:
13903 VT = MVT::v4i32;
13904 break;
13905 case Intrinsic::ppc_vsx_stxvd2x:
13906 VT = MVT::v2f64;
13907 break;
13908 case Intrinsic::ppc_vsx_stxvw4x_be:
13909 VT = MVT::v4i32;
13910 break;
13911 case Intrinsic::ppc_vsx_stxvd2x_be:
13912 VT = MVT::v2f64;
13913 break;
13914 case Intrinsic::ppc_altivec_stvebx:
13915 VT = MVT::i8;
13916 break;
13917 case Intrinsic::ppc_altivec_stvehx:
13918 VT = MVT::i16;
13919 break;
13920 case Intrinsic::ppc_altivec_stvewx:
13921 VT = MVT::i32;
13922 break;
13923 }
13924
13925 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13926 }
13927
13928 return false;
13929}
13930
13931// Return true is there is a nearyby consecutive load to the one provided
13932// (regardless of alignment). We search up and down the chain, looking though
13933// token factors and other loads (but nothing else). As a result, a true result
13934// indicates that it is safe to create a new consecutive load adjacent to the
13935// load provided.
13937 SDValue Chain = LD->getChain();
13938 EVT VT = LD->getMemoryVT();
13939
13940 SmallSet<SDNode *, 16> LoadRoots;
13941 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13942 SmallSet<SDNode *, 16> Visited;
13943
13944 // First, search up the chain, branching to follow all token-factor operands.
13945 // If we find a consecutive load, then we're done, otherwise, record all
13946 // nodes just above the top-level loads and token factors.
13947 while (!Queue.empty()) {
13948 SDNode *ChainNext = Queue.pop_back_val();
13949 if (!Visited.insert(ChainNext).second)
13950 continue;
13951
13952 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13953 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13954 return true;
13955
13956 if (!Visited.count(ChainLD->getChain().getNode()))
13957 Queue.push_back(ChainLD->getChain().getNode());
13958 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13959 for (const SDUse &O : ChainNext->ops())
13960 if (!Visited.count(O.getNode()))
13961 Queue.push_back(O.getNode());
13962 } else
13963 LoadRoots.insert(ChainNext);
13964 }
13965
13966 // Second, search down the chain, starting from the top-level nodes recorded
13967 // in the first phase. These top-level nodes are the nodes just above all
13968 // loads and token factors. Starting with their uses, recursively look though
13969 // all loads (just the chain uses) and token factors to find a consecutive
13970 // load.
13971 Visited.clear();
13972 Queue.clear();
13973
13974 for (SDNode *I : LoadRoots) {
13975 Queue.push_back(I);
13976
13977 while (!Queue.empty()) {
13978 SDNode *LoadRoot = Queue.pop_back_val();
13979 if (!Visited.insert(LoadRoot).second)
13980 continue;
13981
13982 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13983 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13984 return true;
13985
13986 for (SDNode *U : LoadRoot->uses())
13987 if (((isa<MemSDNode>(U) &&
13988 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13989 U->getOpcode() == ISD::TokenFactor) &&
13990 !Visited.count(U))
13991 Queue.push_back(U);
13992 }
13993 }
13994
13995 return false;
13996}
13997
13998/// This function is called when we have proved that a SETCC node can be replaced
13999/// by subtraction (and other supporting instructions) so that the result of
14000/// comparison is kept in a GPR instead of CR. This function is purely for
14001/// codegen purposes and has some flags to guide the codegen process.
14002static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14003 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14004 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14005
14006 // Zero extend the operands to the largest legal integer. Originally, they
14007 // must be of a strictly smaller size.
14008 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14009 DAG.getConstant(Size, DL, MVT::i32));
14010 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14011 DAG.getConstant(Size, DL, MVT::i32));
14012
14013 // Swap if needed. Depends on the condition code.
14014 if (Swap)
14015 std::swap(Op0, Op1);
14016
14017 // Subtract extended integers.
14018 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14019
14020 // Move the sign bit to the least significant position and zero out the rest.
14021 // Now the least significant bit carries the result of original comparison.
14022 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14023 DAG.getConstant(Size - 1, DL, MVT::i32));
14024 auto Final = Shifted;
14025
14026 // Complement the result if needed. Based on the condition code.
14027 if (Complement)
14028 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14029 DAG.getConstant(1, DL, MVT::i64));
14030
14031 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14032}
14033
14034SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14035 DAGCombinerInfo &DCI) const {
14036 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14037
14038 SelectionDAG &DAG = DCI.DAG;
14039 SDLoc DL(N);
14040
14041 // Size of integers being compared has a critical role in the following
14042 // analysis, so we prefer to do this when all types are legal.
14043 if (!DCI.isAfterLegalizeDAG())
14044 return SDValue();
14045
14046 // If all users of SETCC extend its value to a legal integer type
14047 // then we replace SETCC with a subtraction
14048 for (const SDNode *U : N->uses())
14049 if (U->getOpcode() != ISD::ZERO_EXTEND)
14050 return SDValue();
14051
14052 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14053 auto OpSize = N->getOperand(0).getValueSizeInBits();
14054
14056
14057 if (OpSize < Size) {
14058 switch (CC) {
14059 default: break;
14060 case ISD::SETULT:
14061 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14062 case ISD::SETULE:
14063 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14064 case ISD::SETUGT:
14065 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14066 case ISD::SETUGE:
14067 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14068 }
14069 }
14070
14071 return SDValue();
14072}
14073
14074SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14075 DAGCombinerInfo &DCI) const {
14076 SelectionDAG &DAG = DCI.DAG;
14077 SDLoc dl(N);
14078
14079 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14080 // If we're tracking CR bits, we need to be careful that we don't have:
14081 // trunc(binary-ops(zext(x), zext(y)))
14082 // or
14083 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14084 // such that we're unnecessarily moving things into GPRs when it would be
14085 // better to keep them in CR bits.
14086
14087 // Note that trunc here can be an actual i1 trunc, or can be the effective
14088 // truncation that comes from a setcc or select_cc.
14089 if (N->getOpcode() == ISD::TRUNCATE &&
14090 N->getValueType(0) != MVT::i1)
14091 return SDValue();
14092
14093 if (N->getOperand(0).getValueType() != MVT::i32 &&
14094 N->getOperand(0).getValueType() != MVT::i64)
14095 return SDValue();
14096
14097 if (N->getOpcode() == ISD::SETCC ||
14098 N->getOpcode() == ISD::SELECT_CC) {
14099 // If we're looking at a comparison, then we need to make sure that the
14100 // high bits (all except for the first) don't matter the result.
14102 cast<CondCodeSDNode>(N->getOperand(
14103 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14104 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14105
14107 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14108 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14109 return SDValue();
14110 } else if (ISD::isUnsignedIntSetCC(CC)) {
14111 if (!DAG.MaskedValueIsZero(N->getOperand(0),
14112 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14113 !DAG.MaskedValueIsZero(N->getOperand(1),
14114 APInt::getHighBitsSet(OpBits, OpBits-1)))
14115 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14116 : SDValue());
14117 } else {
14118 // This is neither a signed nor an unsigned comparison, just make sure
14119 // that the high bits are equal.
14120 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14121 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14122
14123 // We don't really care about what is known about the first bit (if
14124 // anything), so pretend that it is known zero for both to ensure they can
14125 // be compared as constants.
14126 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14127 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14128
14129 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14130 Op1Known.getConstant() != Op2Known.getConstant())
14131 return SDValue();
14132 }
14133 }
14134
14135 // We now know that the higher-order bits are irrelevant, we just need to
14136 // make sure that all of the intermediate operations are bit operations, and
14137 // all inputs are extensions.
14138 if (N->getOperand(0).getOpcode() != ISD::AND &&
14139 N->getOperand(0).getOpcode() != ISD::OR &&
14140 N->getOperand(0).getOpcode() != ISD::XOR &&
14141 N->getOperand(0).getOpcode() != ISD::SELECT &&
14142 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14143 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14144 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14145 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14146 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14147 return SDValue();
14148
14149 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14150 N->getOperand(1).getOpcode() != ISD::AND &&
14151 N->getOperand(1).getOpcode() != ISD::OR &&
14152 N->getOperand(1).getOpcode() != ISD::XOR &&
14153 N->getOperand(1).getOpcode() != ISD::SELECT &&
14154 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14155 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14156 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14157 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14158 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14159 return SDValue();
14160
14162 SmallVector<SDValue, 8> BinOps, PromOps;
14164
14165 for (unsigned i = 0; i < 2; ++i) {
14166 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14167 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14168 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14169 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14170 isa<ConstantSDNode>(N->getOperand(i)))
14171 Inputs.push_back(N->getOperand(i));
14172 else
14173 BinOps.push_back(N->getOperand(i));
14174
14175 if (N->getOpcode() == ISD::TRUNCATE)
14176 break;
14177 }
14178
14179 // Visit all inputs, collect all binary operations (and, or, xor and
14180 // select) that are all fed by extensions.
14181 while (!BinOps.empty()) {
14182 SDValue BinOp = BinOps.pop_back_val();
14183
14184 if (!Visited.insert(BinOp.getNode()).second)
14185 continue;
14186
14187 PromOps.push_back(BinOp);
14188
14189 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14190 // The condition of the select is not promoted.
14191 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14192 continue;
14193 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14194 continue;
14195
14196 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14197 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14198 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14199 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14200 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14201 Inputs.push_back(BinOp.getOperand(i));
14202 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14203 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14204 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14205 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14206 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14207 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14208 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14209 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14210 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14211 BinOps.push_back(BinOp.getOperand(i));
14212 } else {
14213 // We have an input that is not an extension or another binary
14214 // operation; we'll abort this transformation.
14215 return SDValue();
14216 }
14217 }
14218 }
14219
14220 // Make sure that this is a self-contained cluster of operations (which
14221 // is not quite the same thing as saying that everything has only one
14222 // use).
14223 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14224 if (isa<ConstantSDNode>(Inputs[i]))
14225 continue;
14226
14227 for (const SDNode *User : Inputs[i].getNode()->uses()) {
14228 if (User != N && !Visited.count(User))
14229 return SDValue();
14230
14231 // Make sure that we're not going to promote the non-output-value
14232 // operand(s) or SELECT or SELECT_CC.
14233 // FIXME: Although we could sometimes handle this, and it does occur in
14234 // practice that one of the condition inputs to the select is also one of
14235 // the outputs, we currently can't deal with this.
14236 if (User->getOpcode() == ISD::SELECT) {
14237 if (User->getOperand(0) == Inputs[i])
14238 return SDValue();
14239 } else if (User->getOpcode() == ISD::SELECT_CC) {
14240 if (User->getOperand(0) == Inputs[i] ||
14241 User->getOperand(1) == Inputs[i])
14242 return SDValue();
14243 }
14244 }
14245 }
14246
14247 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14248 for (const SDNode *User : PromOps[i].getNode()->uses()) {
14249 if (User != N && !Visited.count(User))
14250 return SDValue();
14251
14252 // Make sure that we're not going to promote the non-output-value
14253 // operand(s) or SELECT or SELECT_CC.
14254 // FIXME: Although we could sometimes handle this, and it does occur in
14255 // practice that one of the condition inputs to the select is also one of
14256 // the outputs, we currently can't deal with this.
14257 if (User->getOpcode() == ISD::SELECT) {
14258 if (User->getOperand(0) == PromOps[i])
14259 return SDValue();
14260 } else if (User->getOpcode() == ISD::SELECT_CC) {
14261 if (User->getOperand(0) == PromOps[i] ||
14262 User->getOperand(1) == PromOps[i])
14263 return SDValue();
14264 }
14265 }
14266 }
14267
14268 // Replace all inputs with the extension operand.
14269 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14270 // Constants may have users outside the cluster of to-be-promoted nodes,
14271 // and so we need to replace those as we do the promotions.
14272 if (isa<ConstantSDNode>(Inputs[i]))
14273 continue;
14274 else
14275 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14276 }
14277
14278 std::list<HandleSDNode> PromOpHandles;
14279 for (auto &PromOp : PromOps)
14280 PromOpHandles.emplace_back(PromOp);
14281
14282 // Replace all operations (these are all the same, but have a different
14283 // (i1) return type). DAG.getNode will validate that the types of
14284 // a binary operator match, so go through the list in reverse so that
14285 // we've likely promoted both operands first. Any intermediate truncations or
14286 // extensions disappear.
14287 while (!PromOpHandles.empty()) {
14288 SDValue PromOp = PromOpHandles.back().getValue();
14289 PromOpHandles.pop_back();
14290
14291 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14292 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14293 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14294 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14295 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14296 PromOp.getOperand(0).getValueType() != MVT::i1) {
14297 // The operand is not yet ready (see comment below).
14298 PromOpHandles.emplace_front(PromOp);
14299 continue;
14300 }
14301
14302 SDValue RepValue = PromOp.getOperand(0);
14303 if (isa<ConstantSDNode>(RepValue))
14304 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14305
14306 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14307 continue;
14308 }
14309
14310 unsigned C;
14311 switch (PromOp.getOpcode()) {
14312 default: C = 0; break;
14313 case ISD::SELECT: C = 1; break;
14314 case ISD::SELECT_CC: C = 2; break;
14315 }
14316
14317 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14318 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14319 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14320 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14321 // The to-be-promoted operands of this node have not yet been
14322 // promoted (this should be rare because we're going through the
14323 // list backward, but if one of the operands has several users in
14324 // this cluster of to-be-promoted nodes, it is possible).
14325 PromOpHandles.emplace_front(PromOp);
14326 continue;
14327 }
14328
14330 PromOp.getNode()->op_end());
14331
14332 // If there are any constant inputs, make sure they're replaced now.
14333 for (unsigned i = 0; i < 2; ++i)
14334 if (isa<ConstantSDNode>(Ops[C+i]))
14335 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14336
14337 DAG.ReplaceAllUsesOfValueWith(PromOp,
14338 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14339 }
14340
14341 // Now we're left with the initial truncation itself.
14342 if (N->getOpcode() == ISD::TRUNCATE)
14343 return N->getOperand(0);
14344
14345 // Otherwise, this is a comparison. The operands to be compared have just
14346 // changed type (to i1), but everything else is the same.
14347 return SDValue(N, 0);
14348}
14349
14350SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14351 DAGCombinerInfo &DCI) const {
14352 SelectionDAG &DAG = DCI.DAG;
14353 SDLoc dl(N);
14354
14355 // If we're tracking CR bits, we need to be careful that we don't have:
14356 // zext(binary-ops(trunc(x), trunc(y)))
14357 // or
14358 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14359 // such that we're unnecessarily moving things into CR bits that can more
14360 // efficiently stay in GPRs. Note that if we're not certain that the high
14361 // bits are set as required by the final extension, we still may need to do
14362 // some masking to get the proper behavior.
14363
14364 // This same functionality is important on PPC64 when dealing with
14365 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14366 // the return values of functions. Because it is so similar, it is handled
14367 // here as well.
14368
14369 if (N->getValueType(0) != MVT::i32 &&
14370 N->getValueType(0) != MVT::i64)
14371 return SDValue();
14372
14373 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14374 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14375 return SDValue();
14376
14377 if (N->getOperand(0).getOpcode() != ISD::AND &&
14378 N->getOperand(0).getOpcode() != ISD::OR &&
14379 N->getOperand(0).getOpcode() != ISD::XOR &&
14380 N->getOperand(0).getOpcode() != ISD::SELECT &&
14381 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14382 return SDValue();
14383
14385 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14387
14388 // Visit all inputs, collect all binary operations (and, or, xor and
14389 // select) that are all fed by truncations.
14390 while (!BinOps.empty()) {
14391 SDValue BinOp = BinOps.pop_back_val();
14392
14393 if (!Visited.insert(BinOp.getNode()).second)
14394 continue;
14395
14396 PromOps.push_back(BinOp);
14397
14398 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14399 // The condition of the select is not promoted.
14400 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14401 continue;
14402 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14403 continue;
14404
14405 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14406 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14407 Inputs.push_back(BinOp.getOperand(i));
14408 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14409 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14410 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14411 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14412 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14413 BinOps.push_back(BinOp.getOperand(i));
14414 } else {
14415 // We have an input that is not a truncation or another binary
14416 // operation; we'll abort this transformation.
14417 return SDValue();
14418 }
14419 }
14420 }
14421
14422 // The operands of a select that must be truncated when the select is
14423 // promoted because the operand is actually part of the to-be-promoted set.
14424 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14425
14426 // Make sure that this is a self-contained cluster of operations (which
14427 // is not quite the same thing as saying that everything has only one
14428 // use).
14429 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14430 if (isa<ConstantSDNode>(Inputs[i]))
14431 continue;
14432
14433 for (SDNode *User : Inputs[i].getNode()->uses()) {
14434 if (User != N && !Visited.count(User))
14435 return SDValue();
14436
14437 // If we're going to promote the non-output-value operand(s) or SELECT or
14438 // SELECT_CC, record them for truncation.
14439 if (User->getOpcode() == ISD::SELECT) {
14440 if (User->getOperand(0) == Inputs[i])
14441 SelectTruncOp[0].insert(std::make_pair(User,
14442 User->getOperand(0).getValueType()));
14443 } else if (User->getOpcode() == ISD::SELECT_CC) {
14444 if (User->getOperand(0) == Inputs[i])
14445 SelectTruncOp[0].insert(std::make_pair(User,
14446 User->getOperand(0).getValueType()));
14447 if (User->getOperand(1) == Inputs[i])
14448 SelectTruncOp[1].insert(std::make_pair(User,
14449 User->getOperand(1).getValueType()));
14450 }
14451 }
14452 }
14453
14454 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14455 for (SDNode *User : PromOps[i].getNode()->uses()) {
14456 if (User != N && !Visited.count(User))
14457 return SDValue();
14458
14459 // If we're going to promote the non-output-value operand(s) or SELECT or
14460 // SELECT_CC, record them for truncation.
14461 if (User->getOpcode() == ISD::SELECT) {
14462 if (User->getOperand(0) == PromOps[i])
14463 SelectTruncOp[0].insert(std::make_pair(User,
14464 User->getOperand(0).getValueType()));
14465 } else if (User->getOpcode() == ISD::SELECT_CC) {
14466 if (User->getOperand(0) == PromOps[i])
14467 SelectTruncOp[0].insert(std::make_pair(User,
14468 User->getOperand(0).getValueType()));
14469 if (User->getOperand(1) == PromOps[i])
14470 SelectTruncOp[1].insert(std::make_pair(User,
14471 User->getOperand(1).getValueType()));
14472 }
14473 }
14474 }
14475
14476 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14477 bool ReallyNeedsExt = false;
14478 if (N->getOpcode() != ISD::ANY_EXTEND) {
14479 // If all of the inputs are not already sign/zero extended, then
14480 // we'll still need to do that at the end.
14481 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14482 if (isa<ConstantSDNode>(Inputs[i]))
14483 continue;
14484
14485 unsigned OpBits =
14486 Inputs[i].getOperand(0).getValueSizeInBits();
14487 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14488
14489 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14490 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14491 APInt::getHighBitsSet(OpBits,
14492 OpBits-PromBits))) ||
14493 (N->getOpcode() == ISD::SIGN_EXTEND &&
14494 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14495 (OpBits-(PromBits-1)))) {
14496 ReallyNeedsExt = true;
14497 break;
14498 }
14499 }
14500 }
14501
14502 // Replace all inputs, either with the truncation operand, or a
14503 // truncation or extension to the final output type.
14504 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14505 // Constant inputs need to be replaced with the to-be-promoted nodes that
14506 // use them because they might have users outside of the cluster of
14507 // promoted nodes.
14508 if (isa<ConstantSDNode>(Inputs[i]))
14509 continue;
14510
14511 SDValue InSrc = Inputs[i].getOperand(0);
14512 if (Inputs[i].getValueType() == N->getValueType(0))
14513 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14514 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14515 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14516 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14517 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14518 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14519 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14520 else
14521 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14522 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14523 }
14524
14525 std::list<HandleSDNode> PromOpHandles;
14526 for (auto &PromOp : PromOps)
14527 PromOpHandles.emplace_back(PromOp);
14528
14529 // Replace all operations (these are all the same, but have a different
14530 // (promoted) return type). DAG.getNode will validate that the types of
14531 // a binary operator match, so go through the list in reverse so that
14532 // we've likely promoted both operands first.
14533 while (!PromOpHandles.empty()) {
14534 SDValue PromOp = PromOpHandles.back().getValue();
14535 PromOpHandles.pop_back();
14536
14537 unsigned C;
14538 switch (PromOp.getOpcode()) {
14539 default: C = 0; break;
14540 case ISD::SELECT: C = 1; break;
14541 case ISD::SELECT_CC: C = 2; break;
14542 }
14543
14544 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14545 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14546 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14547 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14548 // The to-be-promoted operands of this node have not yet been
14549 // promoted (this should be rare because we're going through the
14550 // list backward, but if one of the operands has several users in
14551 // this cluster of to-be-promoted nodes, it is possible).
14552 PromOpHandles.emplace_front(PromOp);
14553 continue;
14554 }
14555
14556 // For SELECT and SELECT_CC nodes, we do a similar check for any
14557 // to-be-promoted comparison inputs.
14558 if (PromOp.getOpcode() == ISD::SELECT ||
14559 PromOp.getOpcode() == ISD::SELECT_CC) {
14560 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14561 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14562 (SelectTruncOp[1].count(PromOp.getNode()) &&
14563 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14564 PromOpHandles.emplace_front(PromOp);
14565 continue;
14566 }
14567 }
14568
14570 PromOp.getNode()->op_end());
14571
14572 // If this node has constant inputs, then they'll need to be promoted here.
14573 for (unsigned i = 0; i < 2; ++i) {
14574 if (!isa<ConstantSDNode>(Ops[C+i]))
14575 continue;
14576 if (Ops[C+i].getValueType() == N->getValueType(0))
14577 continue;
14578
14579 if (N->getOpcode() == ISD::SIGN_EXTEND)
14580 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14581 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14582 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14583 else
14584 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14585 }
14586
14587 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14588 // truncate them again to the original value type.
14589 if (PromOp.getOpcode() == ISD::SELECT ||
14590 PromOp.getOpcode() == ISD::SELECT_CC) {
14591 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14592 if (SI0 != SelectTruncOp[0].end())
14593 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14594 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14595 if (SI1 != SelectTruncOp[1].end())
14596 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14597 }
14598
14599 DAG.ReplaceAllUsesOfValueWith(PromOp,
14600 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14601 }
14602
14603 // Now we're left with the initial extension itself.
14604 if (!ReallyNeedsExt)
14605 return N->getOperand(0);
14606
14607 // To zero extend, just mask off everything except for the first bit (in the
14608 // i1 case).
14609 if (N->getOpcode() == ISD::ZERO_EXTEND)
14610 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14612 N->getValueSizeInBits(0), PromBits),
14613 dl, N->getValueType(0)));
14614
14615 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14616 "Invalid extension type");
14617 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14618 SDValue ShiftCst =
14619 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14620 return DAG.getNode(
14621 ISD::SRA, dl, N->getValueType(0),
14622 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14623 ShiftCst);
14624}
14625
14626SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14627 DAGCombinerInfo &DCI) const {
14628 assert(N->getOpcode() == ISD::SETCC &&
14629 "Should be called with a SETCC node");
14630
14631 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14632 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14633 SDValue LHS = N->getOperand(0);
14634 SDValue RHS = N->getOperand(1);
14635
14636 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14637 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14638 LHS.hasOneUse())
14639 std::swap(LHS, RHS);
14640
14641 // x == 0-y --> x+y == 0
14642 // x != 0-y --> x+y != 0
14643 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14644 RHS.hasOneUse()) {
14645 SDLoc DL(N);
14646 SelectionDAG &DAG = DCI.DAG;
14647 EVT VT = N->getValueType(0);
14648 EVT OpVT = LHS.getValueType();
14649 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14650 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14651 }
14652 }
14653
14654 return DAGCombineTruncBoolExt(N, DCI);
14655}
14656
14657// Is this an extending load from an f32 to an f64?
14658static bool isFPExtLoad(SDValue Op) {
14659 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14660 return LD->getExtensionType() == ISD::EXTLOAD &&
14661 Op.getValueType() == MVT::f64;
14662 return false;
14663}
14664
14665/// Reduces the number of fp-to-int conversion when building a vector.
14666///
14667/// If this vector is built out of floating to integer conversions,
14668/// transform it to a vector built out of floating point values followed by a
14669/// single floating to integer conversion of the vector.
14670/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14671/// becomes (fptosi (build_vector ($A, $B, ...)))
14672SDValue PPCTargetLowering::
14673combineElementTruncationToVectorTruncation(SDNode *N,
14674 DAGCombinerInfo &DCI) const {
14675 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14676 "Should be called with a BUILD_VECTOR node");
14677
14678 SelectionDAG &DAG = DCI.DAG;
14679 SDLoc dl(N);
14680
14681 SDValue FirstInput = N->getOperand(0);
14682 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14683 "The input operand must be an fp-to-int conversion.");
14684
14685 // This combine happens after legalization so the fp_to_[su]i nodes are
14686 // already converted to PPCSISD nodes.
14687 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14688 if (FirstConversion == PPCISD::FCTIDZ ||
14689 FirstConversion == PPCISD::FCTIDUZ ||
14690 FirstConversion == PPCISD::FCTIWZ ||
14691 FirstConversion == PPCISD::FCTIWUZ) {
14692 bool IsSplat = true;
14693 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14694 FirstConversion == PPCISD::FCTIWUZ;
14695 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14697 EVT TargetVT = N->getValueType(0);
14698 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14699 SDValue NextOp = N->getOperand(i);
14700 if (NextOp.getOpcode() != PPCISD::MFVSR)
14701 return SDValue();
14702 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14703 if (NextConversion != FirstConversion)
14704 return SDValue();
14705 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14706 // This is not valid if the input was originally double precision. It is
14707 // also not profitable to do unless this is an extending load in which
14708 // case doing this combine will allow us to combine consecutive loads.
14709 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14710 return SDValue();
14711 if (N->getOperand(i) != FirstInput)
14712 IsSplat = false;
14713 }
14714
14715 // If this is a splat, we leave it as-is since there will be only a single
14716 // fp-to-int conversion followed by a splat of the integer. This is better
14717 // for 32-bit and smaller ints and neutral for 64-bit ints.
14718 if (IsSplat)
14719 return SDValue();
14720
14721 // Now that we know we have the right type of node, get its operands
14722 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14723 SDValue In = N->getOperand(i).getOperand(0);
14724 if (Is32Bit) {
14725 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14726 // here, we know that all inputs are extending loads so this is safe).
14727 if (In.isUndef())
14728 Ops.push_back(DAG.getUNDEF(SrcVT));
14729 else {
14730 SDValue Trunc =
14731 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14732 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14733 Ops.push_back(Trunc);
14734 }
14735 } else
14736 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14737 }
14738
14739 unsigned Opcode;
14740 if (FirstConversion == PPCISD::FCTIDZ ||
14741 FirstConversion == PPCISD::FCTIWZ)
14742 Opcode = ISD::FP_TO_SINT;
14743 else
14744 Opcode = ISD::FP_TO_UINT;
14745
14746 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14747 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14748 return DAG.getNode(Opcode, dl, TargetVT, BV);
14749 }
14750 return SDValue();
14751}
14752
14753/// Reduce the number of loads when building a vector.
14754///
14755/// Building a vector out of multiple loads can be converted to a load
14756/// of the vector type if the loads are consecutive. If the loads are
14757/// consecutive but in descending order, a shuffle is added at the end
14758/// to reorder the vector.
14760 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14761 "Should be called with a BUILD_VECTOR node");
14762
14763 SDLoc dl(N);
14764
14765 // Return early for non byte-sized type, as they can't be consecutive.
14766 if (!N->getValueType(0).getVectorElementType().isByteSized())
14767 return SDValue();
14768
14769 bool InputsAreConsecutiveLoads = true;
14770 bool InputsAreReverseConsecutive = true;
14771 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14772 SDValue FirstInput = N->getOperand(0);
14773 bool IsRoundOfExtLoad = false;
14774 LoadSDNode *FirstLoad = nullptr;
14775
14776 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14777 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14778 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14779 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14780 }
14781 // Not a build vector of (possibly fp_rounded) loads.
14782 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14783 N->getNumOperands() == 1)
14784 return SDValue();
14785
14786 if (!IsRoundOfExtLoad)
14787 FirstLoad = cast<LoadSDNode>(FirstInput);
14788
14790 InputLoads.push_back(FirstLoad);
14791 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14792 // If any inputs are fp_round(extload), they all must be.
14793 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14794 return SDValue();
14795
14796 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14797 N->getOperand(i);
14798 if (NextInput.getOpcode() != ISD::LOAD)
14799 return SDValue();
14800
14801 SDValue PreviousInput =
14802 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14803 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14804 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14805
14806 // If any inputs are fp_round(extload), they all must be.
14807 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14808 return SDValue();
14809
14810 // We only care about regular loads. The PPC-specific load intrinsics
14811 // will not lead to a merge opportunity.
14812 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14813 InputsAreConsecutiveLoads = false;
14814 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14815 InputsAreReverseConsecutive = false;
14816
14817 // Exit early if the loads are neither consecutive nor reverse consecutive.
14818 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14819 return SDValue();
14820 InputLoads.push_back(LD2);
14821 }
14822
14823 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14824 "The loads cannot be both consecutive and reverse consecutive.");
14825
14826 SDValue WideLoad;
14827 SDValue ReturnSDVal;
14828 if (InputsAreConsecutiveLoads) {
14829 assert(FirstLoad && "Input needs to be a LoadSDNode.");
14830 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14831 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14832 FirstLoad->getAlign());
14833 ReturnSDVal = WideLoad;
14834 } else if (InputsAreReverseConsecutive) {
14835 LoadSDNode *LastLoad = InputLoads.back();
14836 assert(LastLoad && "Input needs to be a LoadSDNode.");
14837 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14838 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14839 LastLoad->getAlign());
14841 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14842 Ops.push_back(i);
14843
14844 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14845 DAG.getUNDEF(N->getValueType(0)), Ops);
14846 } else
14847 return SDValue();
14848
14849 for (auto *LD : InputLoads)
14850 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14851 return ReturnSDVal;
14852}
14853
14854// This function adds the required vector_shuffle needed to get
14855// the elements of the vector extract in the correct position
14856// as specified by the CorrectElems encoding.
14858 SDValue Input, uint64_t Elems,
14859 uint64_t CorrectElems) {
14860 SDLoc dl(N);
14861
14862 unsigned NumElems = Input.getValueType().getVectorNumElements();
14863 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14864
14865 // Knowing the element indices being extracted from the original
14866 // vector and the order in which they're being inserted, just put
14867 // them at element indices required for the instruction.
14868 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14869 if (DAG.getDataLayout().isLittleEndian())
14870 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14871 else
14872 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14873 CorrectElems = CorrectElems >> 8;
14874 Elems = Elems >> 8;
14875 }
14876
14877 SDValue Shuffle =
14878 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14879 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14880
14881 EVT VT = N->getValueType(0);
14882 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14883
14884 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14887 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14888 DAG.getValueType(ExtVT));
14889}
14890
14891// Look for build vector patterns where input operands come from sign
14892// extended vector_extract elements of specific indices. If the correct indices
14893// aren't used, add a vector shuffle to fix up the indices and create
14894// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14895// during instruction selection.
14897 // This array encodes the indices that the vector sign extend instructions
14898 // extract from when extending from one type to another for both BE and LE.
14899 // The right nibble of each byte corresponds to the LE incides.
14900 // and the left nibble of each byte corresponds to the BE incides.
14901 // For example: 0x3074B8FC byte->word
14902 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14903 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14904 // For example: 0x000070F8 byte->double word
14905 // For LE: the allowed indices are: 0x0,0x8
14906 // For BE: the allowed indices are: 0x7,0xF
14907 uint64_t TargetElems[] = {
14908 0x3074B8FC, // b->w
14909 0x000070F8, // b->d
14910 0x10325476, // h->w
14911 0x00003074, // h->d
14912 0x00001032, // w->d
14913 };
14914
14915 uint64_t Elems = 0;
14916 int Index;
14917 SDValue Input;
14918
14919 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14920 if (!Op)
14921 return false;
14922 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14923 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14924 return false;
14925
14926 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14927 // of the right width.
14928 SDValue Extract = Op.getOperand(0);
14929 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14930 Extract = Extract.getOperand(0);
14931 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14932 return false;
14933
14934 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14935 if (!ExtOp)
14936 return false;
14937
14938 Index = ExtOp->getZExtValue();
14939 if (Input && Input != Extract.getOperand(0))
14940 return false;
14941
14942 if (!Input)
14943 Input = Extract.getOperand(0);
14944
14945 Elems = Elems << 8;
14946 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14947 Elems |= Index;
14948
14949 return true;
14950 };
14951
14952 // If the build vector operands aren't sign extended vector extracts,
14953 // of the same input vector, then return.
14954 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14955 if (!isSExtOfVecExtract(N->getOperand(i))) {
14956 return SDValue();
14957 }
14958 }
14959
14960 // If the vector extract indices are not correct, add the appropriate
14961 // vector_shuffle.
14962 int TgtElemArrayIdx;
14963 int InputSize = Input.getValueType().getScalarSizeInBits();
14964 int OutputSize = N->getValueType(0).getScalarSizeInBits();
14965 if (InputSize + OutputSize == 40)
14966 TgtElemArrayIdx = 0;
14967 else if (InputSize + OutputSize == 72)
14968 TgtElemArrayIdx = 1;
14969 else if (InputSize + OutputSize == 48)
14970 TgtElemArrayIdx = 2;
14971 else if (InputSize + OutputSize == 80)
14972 TgtElemArrayIdx = 3;
14973 else if (InputSize + OutputSize == 96)
14974 TgtElemArrayIdx = 4;
14975 else
14976 return SDValue();
14977
14978 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14979 CorrectElems = DAG.getDataLayout().isLittleEndian()
14980 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14981 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14982 if (Elems != CorrectElems) {
14983 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14984 }
14985
14986 // Regular lowering will catch cases where a shuffle is not needed.
14987 return SDValue();
14988}
14989
14990// Look for the pattern of a load from a narrow width to i128, feeding
14991// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14992// (LXVRZX). This node represents a zero extending load that will be matched
14993// to the Load VSX Vector Rightmost instructions.
14995 SDLoc DL(N);
14996
14997 // This combine is only eligible for a BUILD_VECTOR of v1i128.
14998 if (N->getValueType(0) != MVT::v1i128)
14999 return SDValue();
15000
15001 SDValue Operand = N->getOperand(0);
15002 // Proceed with the transformation if the operand to the BUILD_VECTOR
15003 // is a load instruction.
15004 if (Operand.getOpcode() != ISD::LOAD)
15005 return SDValue();
15006
15007 auto *LD = cast<LoadSDNode>(Operand);
15008 EVT MemoryType = LD->getMemoryVT();
15009
15010 // This transformation is only valid if the we are loading either a byte,
15011 // halfword, word, or doubleword.
15012 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15013 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15014
15015 // Ensure that the load from the narrow width is being zero extended to i128.
15016 if (!ValidLDType ||
15017 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15018 LD->getExtensionType() != ISD::EXTLOAD))
15019 return SDValue();
15020
15021 SDValue LoadOps[] = {
15022 LD->getChain(), LD->getBasePtr(),
15023 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15024
15026 DAG.getVTList(MVT::v1i128, MVT::Other),
15027 LoadOps, MemoryType, LD->getMemOperand());
15028}
15029
15030SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15031 DAGCombinerInfo &DCI) const {
15032 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15033 "Should be called with a BUILD_VECTOR node");
15034
15035 SelectionDAG &DAG = DCI.DAG;
15036 SDLoc dl(N);
15037
15038 if (!Subtarget.hasVSX())
15039 return SDValue();
15040
15041 // The target independent DAG combiner will leave a build_vector of
15042 // float-to-int conversions intact. We can generate MUCH better code for
15043 // a float-to-int conversion of a vector of floats.
15044 SDValue FirstInput = N->getOperand(0);
15045 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15046 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15047 if (Reduced)
15048 return Reduced;
15049 }
15050
15051 // If we're building a vector out of consecutive loads, just load that
15052 // vector type.
15053 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15054 if (Reduced)
15055 return Reduced;
15056
15057 // If we're building a vector out of extended elements from another vector
15058 // we have P9 vector integer extend instructions. The code assumes legal
15059 // input types (i.e. it can't handle things like v4i16) so do not run before
15060 // legalization.
15061 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15062 Reduced = combineBVOfVecSExt(N, DAG);
15063 if (Reduced)
15064 return Reduced;
15065 }
15066
15067 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15068 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15069 // is a load from <valid narrow width> to i128.
15070 if (Subtarget.isISA3_1()) {
15071 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15072 if (BVOfZLoad)
15073 return BVOfZLoad;
15074 }
15075
15076 if (N->getValueType(0) != MVT::v2f64)
15077 return SDValue();
15078
15079 // Looking for:
15080 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15081 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15082 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15083 return SDValue();
15084 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15085 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15086 return SDValue();
15087 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15088 return SDValue();
15089
15090 SDValue Ext1 = FirstInput.getOperand(0);
15091 SDValue Ext2 = N->getOperand(1).getOperand(0);
15092 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15094 return SDValue();
15095
15096 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15097 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15098 if (!Ext1Op || !Ext2Op)
15099 return SDValue();
15100 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15101 Ext1.getOperand(0) != Ext2.getOperand(0))
15102 return SDValue();
15103
15104 int FirstElem = Ext1Op->getZExtValue();
15105 int SecondElem = Ext2Op->getZExtValue();
15106 int SubvecIdx;
15107 if (FirstElem == 0 && SecondElem == 1)
15108 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15109 else if (FirstElem == 2 && SecondElem == 3)
15110 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15111 else
15112 return SDValue();
15113
15114 SDValue SrcVec = Ext1.getOperand(0);
15115 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15117 return DAG.getNode(NodeType, dl, MVT::v2f64,
15118 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15119}
15120
15121SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15122 DAGCombinerInfo &DCI) const {
15123 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15124 N->getOpcode() == ISD::UINT_TO_FP) &&
15125 "Need an int -> FP conversion node here");
15126
15127 if (useSoftFloat() || !Subtarget.has64BitSupport())
15128 return SDValue();
15129
15130 SelectionDAG &DAG = DCI.DAG;
15131 SDLoc dl(N);
15132 SDValue Op(N, 0);
15133
15134 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15135 // from the hardware.
15136 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15137 return SDValue();
15138 if (!Op.getOperand(0).getValueType().isSimple())
15139 return SDValue();
15140 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15141 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15142 return SDValue();
15143
15144 SDValue FirstOperand(Op.getOperand(0));
15145 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15146 (FirstOperand.getValueType() == MVT::i8 ||
15147 FirstOperand.getValueType() == MVT::i16);
15148 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15149 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15150 bool DstDouble = Op.getValueType() == MVT::f64;
15151 unsigned ConvOp = Signed ?
15152 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15153 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15154 SDValue WidthConst =
15155 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15156 dl, false);
15157 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15158 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15160 DAG.getVTList(MVT::f64, MVT::Other),
15161 Ops, MVT::i8, LDN->getMemOperand());
15162 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15163
15164 // For signed conversion, we need to sign-extend the value in the VSR
15165 if (Signed) {
15166 SDValue ExtOps[] = { Ld, WidthConst };
15167 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15168 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15169 } else
15170 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15171 }
15172
15173
15174 // For i32 intermediate values, unfortunately, the conversion functions
15175 // leave the upper 32 bits of the value are undefined. Within the set of
15176 // scalar instructions, we have no method for zero- or sign-extending the
15177 // value. Thus, we cannot handle i32 intermediate values here.
15178 if (Op.getOperand(0).getValueType() == MVT::i32)
15179 return SDValue();
15180
15181 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15182 "UINT_TO_FP is supported only with FPCVT");
15183
15184 // If we have FCFIDS, then use it when converting to single-precision.
15185 // Otherwise, convert to double-precision and then round.
15186 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15187 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15189 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15190 : PPCISD::FCFID);
15191 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15192 ? MVT::f32
15193 : MVT::f64;
15194
15195 // If we're converting from a float, to an int, and back to a float again,
15196 // then we don't need the store/load pair at all.
15197 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15198 Subtarget.hasFPCVT()) ||
15199 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15200 SDValue Src = Op.getOperand(0).getOperand(0);
15201 if (Src.getValueType() == MVT::f32) {
15202 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15203 DCI.AddToWorklist(Src.getNode());
15204 } else if (Src.getValueType() != MVT::f64) {
15205 // Make sure that we don't pick up a ppc_fp128 source value.
15206 return SDValue();
15207 }
15208
15209 unsigned FCTOp =
15210 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15212
15213 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15214 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15215
15216 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15217 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15218 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15219 DCI.AddToWorklist(FP.getNode());
15220 }
15221
15222 return FP;
15223 }
15224
15225 return SDValue();
15226}
15227
15228// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15229// builtins) into loads with swaps.
15231 DAGCombinerInfo &DCI) const {
15232 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15233 // load combines.
15234 if (DCI.isBeforeLegalizeOps())
15235 return SDValue();
15236
15237 SelectionDAG &DAG = DCI.DAG;
15238 SDLoc dl(N);
15239 SDValue Chain;
15240 SDValue Base;
15241 MachineMemOperand *MMO;
15242
15243 switch (N->getOpcode()) {
15244 default:
15245 llvm_unreachable("Unexpected opcode for little endian VSX load");
15246 case ISD::LOAD: {
15247 LoadSDNode *LD = cast<LoadSDNode>(N);
15248 Chain = LD->getChain();
15249 Base = LD->getBasePtr();
15250 MMO = LD->getMemOperand();
15251 // If the MMO suggests this isn't a load of a full vector, leave
15252 // things alone. For a built-in, we have to make the change for
15253 // correctness, so if there is a size problem that will be a bug.
15254 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15255 return SDValue();
15256 break;
15257 }
15259 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15260 Chain = Intrin->getChain();
15261 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15262 // us what we want. Get operand 2 instead.
15263 Base = Intrin->getOperand(2);
15264 MMO = Intrin->getMemOperand();
15265 break;
15266 }
15267 }
15268
15269 MVT VecTy = N->getValueType(0).getSimpleVT();
15270
15271 SDValue LoadOps[] = { Chain, Base };
15273 DAG.getVTList(MVT::v2f64, MVT::Other),
15274 LoadOps, MVT::v2f64, MMO);
15275
15276 DCI.AddToWorklist(Load.getNode());
15277 Chain = Load.getValue(1);
15278 SDValue Swap = DAG.getNode(
15279 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15280 DCI.AddToWorklist(Swap.getNode());
15281
15282 // Add a bitcast if the resulting load type doesn't match v2f64.
15283 if (VecTy != MVT::v2f64) {
15284 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15285 DCI.AddToWorklist(N.getNode());
15286 // Package {bitcast value, swap's chain} to match Load's shape.
15287 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15288 N, Swap.getValue(1));
15289 }
15290
15291 return Swap;
15292}
15293
15294// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15295// builtins) into stores with swaps.
15297 DAGCombinerInfo &DCI) const {
15298 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15299 // store combines.
15300 if (DCI.isBeforeLegalizeOps())
15301 return SDValue();
15302
15303 SelectionDAG &DAG = DCI.DAG;
15304 SDLoc dl(N);
15305 SDValue Chain;
15306 SDValue Base;
15307 unsigned SrcOpnd;
15308 MachineMemOperand *MMO;
15309
15310 switch (N->getOpcode()) {
15311 default:
15312 llvm_unreachable("Unexpected opcode for little endian VSX store");
15313 case ISD::STORE: {
15314 StoreSDNode *ST = cast<StoreSDNode>(N);
15315 Chain = ST->getChain();
15316 Base = ST->getBasePtr();
15317 MMO = ST->getMemOperand();
15318 SrcOpnd = 1;
15319 // If the MMO suggests this isn't a store of a full vector, leave
15320 // things alone. For a built-in, we have to make the change for
15321 // correctness, so if there is a size problem that will be a bug.
15322 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15323 return SDValue();
15324 break;
15325 }
15326 case ISD::INTRINSIC_VOID: {
15327 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15328 Chain = Intrin->getChain();
15329 // Intrin->getBasePtr() oddly does not get what we want.
15330 Base = Intrin->getOperand(3);
15331 MMO = Intrin->getMemOperand();
15332 SrcOpnd = 2;
15333 break;
15334 }
15335 }
15336
15337 SDValue Src = N->getOperand(SrcOpnd);
15338 MVT VecTy = Src.getValueType().getSimpleVT();
15339
15340 // All stores are done as v2f64 and possible bit cast.
15341 if (VecTy != MVT::v2f64) {
15342 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15343 DCI.AddToWorklist(Src.getNode());
15344 }
15345
15346 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15347 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15348 DCI.AddToWorklist(Swap.getNode());
15349 Chain = Swap.getValue(1);
15350 SDValue StoreOps[] = { Chain, Swap, Base };
15352 DAG.getVTList(MVT::Other),
15353 StoreOps, VecTy, MMO);
15354 DCI.AddToWorklist(Store.getNode());
15355 return Store;
15356}
15357
15358// Handle DAG combine for STORE (FP_TO_INT F).
15359SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15360 DAGCombinerInfo &DCI) const {
15361 SelectionDAG &DAG = DCI.DAG;
15362 SDLoc dl(N);
15363 unsigned Opcode = N->getOperand(1).getOpcode();
15364 (void)Opcode;
15365 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15366
15367 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15368 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15369 && "Not a FP_TO_INT Instruction!");
15370
15371 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15372 EVT Op1VT = N->getOperand(1).getValueType();
15373 EVT ResVT = Val.getValueType();
15374
15375 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15376 return SDValue();
15377
15378 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15379 bool ValidTypeForStoreFltAsInt =
15380 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15381 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15382
15383 // TODO: Lower conversion from f128 on all VSX targets
15384 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15385 return SDValue();
15386
15387 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15388 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15389 return SDValue();
15390
15391 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15392
15393 // Set number of bytes being converted.
15394 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15395 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15396 DAG.getIntPtrConstant(ByteSize, dl, false),
15397 DAG.getValueType(Op1VT)};
15398
15400 DAG.getVTList(MVT::Other), Ops,
15401 cast<StoreSDNode>(N)->getMemoryVT(),
15402 cast<StoreSDNode>(N)->getMemOperand());
15403
15404 return Val;
15405}
15406
15407static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15408 // Check that the source of the element keeps flipping
15409 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15410 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15411 for (int i = 1, e = Mask.size(); i < e; i++) {
15412 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15413 return false;
15414 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15415 return false;
15416 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15417 }
15418 return true;
15419}
15420
15421static bool isSplatBV(SDValue Op) {
15422 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15423 return false;
15424 SDValue FirstOp;
15425
15426 // Find first non-undef input.
15427 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15428 FirstOp = Op.getOperand(i);
15429 if (!FirstOp.isUndef())
15430 break;
15431 }
15432
15433 // All inputs are undef or the same as the first non-undef input.
15434 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15435 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15436 return false;
15437 return true;
15438}
15439
15441 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15442 return Op;
15443 if (Op.getOpcode() != ISD::BITCAST)
15444 return SDValue();
15445 Op = Op.getOperand(0);
15446 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15447 return Op;
15448 return SDValue();
15449}
15450
15451// Fix up the shuffle mask to account for the fact that the result of
15452// scalar_to_vector is not in lane zero. This just takes all values in
15453// the ranges specified by the min/max indices and adds the number of
15454// elements required to ensure each element comes from the respective
15455// position in the valid lane.
15456// On little endian, that's just the corresponding element in the other
15457// half of the vector. On big endian, it is in the same half but right
15458// justified rather than left justified in that half.
15460 int LHSMaxIdx, int RHSMinIdx,
15461 int RHSMaxIdx, int HalfVec,
15462 unsigned ValidLaneWidth,
15463 const PPCSubtarget &Subtarget) {
15464 for (int i = 0, e = ShuffV.size(); i < e; i++) {
15465 int Idx = ShuffV[i];
15466 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15467 ShuffV[i] +=
15468 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15469 }
15470}
15471
15472// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15473// the original is:
15474// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15475// In such a case, just change the shuffle mask to extract the element
15476// from the permuted index.
15478 const PPCSubtarget &Subtarget) {
15479 SDLoc dl(OrigSToV);
15480 EVT VT = OrigSToV.getValueType();
15481 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15482 "Expecting a SCALAR_TO_VECTOR here");
15483 SDValue Input = OrigSToV.getOperand(0);
15484
15485 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15486 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15487 SDValue OrigVector = Input.getOperand(0);
15488
15489 // Can't handle non-const element indices or different vector types
15490 // for the input to the extract and the output of the scalar_to_vector.
15491 if (Idx && VT == OrigVector.getValueType()) {
15492 unsigned NumElts = VT.getVectorNumElements();
15493 assert(
15494 NumElts > 1 &&
15495 "Cannot produce a permuted scalar_to_vector for one element vector");
15496 SmallVector<int, 16> NewMask(NumElts, -1);
15497 unsigned ResultInElt = NumElts / 2;
15498 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15499 NewMask[ResultInElt] = Idx->getZExtValue();
15500 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15501 }
15502 }
15503 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15504 OrigSToV.getOperand(0));
15505}
15506
15507// On little endian subtargets, combine shuffles such as:
15508// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15509// into:
15510// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15511// because the latter can be matched to a single instruction merge.
15512// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15513// to put the value into element zero. Adjust the shuffle mask so that the
15514// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15515// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15516// nodes with elements smaller than doubleword because all the ways
15517// of getting scalar data into a vector register put the value in the
15518// rightmost element of the left half of the vector.
15519SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15520 SelectionDAG &DAG) const {
15521 SDValue LHS = SVN->getOperand(0);
15522 SDValue RHS = SVN->getOperand(1);
15523 auto Mask = SVN->getMask();
15524 int NumElts = LHS.getValueType().getVectorNumElements();
15525 SDValue Res(SVN, 0);
15526 SDLoc dl(SVN);
15527 bool IsLittleEndian = Subtarget.isLittleEndian();
15528
15529 // On big endian targets this is only useful for subtargets with direct moves.
15530 // On little endian targets it would be useful for all subtargets with VSX.
15531 // However adding special handling for LE subtargets without direct moves
15532 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15533 // which includes direct moves.
15534 if (!Subtarget.hasDirectMove())
15535 return Res;
15536
15537 // If this is not a shuffle of a shuffle and the first element comes from
15538 // the second vector, canonicalize to the commuted form. This will make it
15539 // more likely to match one of the single instruction patterns.
15540 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15541 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15542 std::swap(LHS, RHS);
15543 Res = DAG.getCommutedVectorShuffle(*SVN);
15544 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15545 }
15546
15547 // Adjust the shuffle mask if either input vector comes from a
15548 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15549 // form (to prevent the need for a swap).
15550 SmallVector<int, 16> ShuffV(Mask);
15551 SDValue SToVLHS = isScalarToVec(LHS);
15552 SDValue SToVRHS = isScalarToVec(RHS);
15553 if (SToVLHS || SToVRHS) {
15554 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15555 // same type and have differing element sizes, then do not perform
15556 // the following transformation. The current transformation for
15557 // SCALAR_TO_VECTOR assumes that both input vectors have the same
15558 // element size. This will be updated in the future to account for
15559 // differing sizes of the LHS and RHS.
15560 if (SToVLHS && SToVRHS &&
15561 (SToVLHS.getValueType().getScalarSizeInBits() !=
15562 SToVRHS.getValueType().getScalarSizeInBits()))
15563 return Res;
15564
15565 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15566 : SToVRHS.getValueType().getVectorNumElements();
15567 int NumEltsOut = ShuffV.size();
15568 // The width of the "valid lane" (i.e. the lane that contains the value that
15569 // is vectorized) needs to be expressed in terms of the number of elements
15570 // of the shuffle. It is thereby the ratio of the values before and after
15571 // any bitcast.
15572 unsigned ValidLaneWidth =
15573 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15574 LHS.getValueType().getScalarSizeInBits()
15575 : SToVRHS.getValueType().getScalarSizeInBits() /
15576 RHS.getValueType().getScalarSizeInBits();
15577
15578 // Initially assume that neither input is permuted. These will be adjusted
15579 // accordingly if either input is.
15580 int LHSMaxIdx = -1;
15581 int RHSMinIdx = -1;
15582 int RHSMaxIdx = -1;
15583 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15584
15585 // Get the permuted scalar to vector nodes for the source(s) that come from
15586 // ISD::SCALAR_TO_VECTOR.
15587 // On big endian systems, this only makes sense for element sizes smaller
15588 // than 64 bits since for 64-bit elements, all instructions already put
15589 // the value into element zero. Since scalar size of LHS and RHS may differ
15590 // after isScalarToVec, this should be checked using their own sizes.
15591 if (SToVLHS) {
15592 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15593 return Res;
15594 // Set up the values for the shuffle vector fixup.
15595 LHSMaxIdx = NumEltsOut / NumEltsIn;
15596 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15597 if (SToVLHS.getValueType() != LHS.getValueType())
15598 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15599 LHS = SToVLHS;
15600 }
15601 if (SToVRHS) {
15602 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15603 return Res;
15604 RHSMinIdx = NumEltsOut;
15605 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15606 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15607 if (SToVRHS.getValueType() != RHS.getValueType())
15608 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15609 RHS = SToVRHS;
15610 }
15611
15612 // Fix up the shuffle mask to reflect where the desired element actually is.
15613 // The minimum and maximum indices that correspond to element zero for both
15614 // the LHS and RHS are computed and will control which shuffle mask entries
15615 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15616 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15617 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15618 HalfVec, ValidLaneWidth, Subtarget);
15619 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15620
15621 // We may have simplified away the shuffle. We won't be able to do anything
15622 // further with it here.
15623 if (!isa<ShuffleVectorSDNode>(Res))
15624 return Res;
15625 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15626 }
15627
15628 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15629 // The common case after we commuted the shuffle is that the RHS is a splat
15630 // and we have elements coming in from the splat at indices that are not
15631 // conducive to using a merge.
15632 // Example:
15633 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15634 if (!isSplatBV(TheSplat))
15635 return Res;
15636
15637 // We are looking for a mask such that all even elements are from
15638 // one vector and all odd elements from the other.
15639 if (!isAlternatingShuffMask(Mask, NumElts))
15640 return Res;
15641
15642 // Adjust the mask so we are pulling in the same index from the splat
15643 // as the index from the interesting vector in consecutive elements.
15644 if (IsLittleEndian) {
15645 // Example (even elements from first vector):
15646 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15647 if (Mask[0] < NumElts)
15648 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15649 if (ShuffV[i] < 0)
15650 continue;
15651 // If element from non-splat is undef, pick first element from splat.
15652 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15653 }
15654 // Example (odd elements from first vector):
15655 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15656 else
15657 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15658 if (ShuffV[i] < 0)
15659 continue;
15660 // If element from non-splat is undef, pick first element from splat.
15661 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15662 }
15663 } else {
15664 // Example (even elements from first vector):
15665 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15666 if (Mask[0] < NumElts)
15667 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15668 if (ShuffV[i] < 0)
15669 continue;
15670 // If element from non-splat is undef, pick first element from splat.
15671 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
15672 }
15673 // Example (odd elements from first vector):
15674 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15675 else
15676 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15677 if (ShuffV[i] < 0)
15678 continue;
15679 // If element from non-splat is undef, pick first element from splat.
15680 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
15681 }
15682 }
15683
15684 // If the RHS has undefs, we need to remove them since we may have created
15685 // a shuffle that adds those instead of the splat value.
15686 SDValue SplatVal =
15687 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15688 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15689
15690 if (IsLittleEndian)
15691 RHS = TheSplat;
15692 else
15693 LHS = TheSplat;
15694 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15695}
15696
15697SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15698 LSBaseSDNode *LSBase,
15699 DAGCombinerInfo &DCI) const {
15700 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15701 "Not a reverse memop pattern!");
15702
15703 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15704 auto Mask = SVN->getMask();
15705 int i = 0;
15706 auto I = Mask.rbegin();
15707 auto E = Mask.rend();
15708
15709 for (; I != E; ++I) {
15710 if (*I != i)
15711 return false;
15712 i++;
15713 }
15714 return true;
15715 };
15716
15717 SelectionDAG &DAG = DCI.DAG;
15718 EVT VT = SVN->getValueType(0);
15719
15720 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15721 return SDValue();
15722
15723 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15724 // See comment in PPCVSXSwapRemoval.cpp.
15725 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15726 if (!Subtarget.hasP9Vector())
15727 return SDValue();
15728
15729 if(!IsElementReverse(SVN))
15730 return SDValue();
15731
15732 if (LSBase->getOpcode() == ISD::LOAD) {
15733 // If the load return value 0 has more than one user except the
15734 // shufflevector instruction, it is not profitable to replace the
15735 // shufflevector with a reverse load.
15736 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15737 UI != UE; ++UI)
15738 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15739 return SDValue();
15740
15741 SDLoc dl(LSBase);
15742 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15743 return DAG.getMemIntrinsicNode(
15744 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15745 LSBase->getMemoryVT(), LSBase->getMemOperand());
15746 }
15747
15748 if (LSBase->getOpcode() == ISD::STORE) {
15749 // If there are other uses of the shuffle, the swap cannot be avoided.
15750 // Forcing the use of an X-Form (since swapped stores only have
15751 // X-Forms) without removing the swap is unprofitable.
15752 if (!SVN->hasOneUse())
15753 return SDValue();
15754
15755 SDLoc dl(LSBase);
15756 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15757 LSBase->getBasePtr()};
15758 return DAG.getMemIntrinsicNode(
15759 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15760 LSBase->getMemoryVT(), LSBase->getMemOperand());
15761 }
15762
15763 llvm_unreachable("Expected a load or store node here");
15764}
15765
15766static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15767 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
15768 if (IntrinsicID == Intrinsic::ppc_stdcx)
15769 StoreWidth = 8;
15770 else if (IntrinsicID == Intrinsic::ppc_stwcx)
15771 StoreWidth = 4;
15772 else if (IntrinsicID == Intrinsic::ppc_sthcx)
15773 StoreWidth = 2;
15774 else if (IntrinsicID == Intrinsic::ppc_stbcx)
15775 StoreWidth = 1;
15776 else
15777 return false;
15778 return true;
15779}
15780
15782 DAGCombinerInfo &DCI) const {
15783 SelectionDAG &DAG = DCI.DAG;
15784 SDLoc dl(N);
15785 switch (N->getOpcode()) {
15786 default: break;
15787 case ISD::ADD:
15788 return combineADD(N, DCI);
15789 case ISD::AND: {
15790 // We don't want (and (zext (shift...)), C) if C fits in the width of the
15791 // original input as that will prevent us from selecting optimal rotates.
15792 // This only matters if the input to the extend is i32 widened to i64.
15793 SDValue Op1 = N->getOperand(0);
15794 SDValue Op2 = N->getOperand(1);
15795 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15796 Op1.getOpcode() != ISD::ANY_EXTEND) ||
15797 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15798 Op1.getOperand(0).getValueType() != MVT::i32)
15799 break;
15800 SDValue NarrowOp = Op1.getOperand(0);
15801 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15802 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15803 break;
15804
15805 uint64_t Imm = Op2->getAsZExtVal();
15806 // Make sure that the constant is narrow enough to fit in the narrow type.
15807 if (!isUInt<32>(Imm))
15808 break;
15809 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15810 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15811 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15812 }
15813 case ISD::SHL:
15814 return combineSHL(N, DCI);
15815 case ISD::SRA:
15816 return combineSRA(N, DCI);
15817 case ISD::SRL:
15818 return combineSRL(N, DCI);
15819 case ISD::MUL:
15820 return combineMUL(N, DCI);
15821 case ISD::FMA:
15822 case PPCISD::FNMSUB:
15823 return combineFMALike(N, DCI);
15824 case PPCISD::SHL:
15825 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15826 return N->getOperand(0);
15827 break;
15828 case PPCISD::SRL:
15829 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15830 return N->getOperand(0);
15831 break;
15832 case PPCISD::SRA:
15833 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15834 if (C->isZero() || // 0 >>s V -> 0.
15835 C->isAllOnes()) // -1 >>s V -> -1.
15836 return N->getOperand(0);
15837 }
15838 break;
15839 case ISD::SIGN_EXTEND:
15840 case ISD::ZERO_EXTEND:
15841 case ISD::ANY_EXTEND:
15842 return DAGCombineExtBoolTrunc(N, DCI);
15843 case ISD::TRUNCATE:
15844 return combineTRUNCATE(N, DCI);
15845 case ISD::SETCC:
15846 if (SDValue CSCC = combineSetCC(N, DCI))
15847 return CSCC;
15848 [[fallthrough]];
15849 case ISD::SELECT_CC:
15850 return DAGCombineTruncBoolExt(N, DCI);
15851 case ISD::SINT_TO_FP:
15852 case ISD::UINT_TO_FP:
15853 return combineFPToIntToFP(N, DCI);
15855 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15856 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15857 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15858 }
15859 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15860 case ISD::STORE: {
15861
15862 EVT Op1VT = N->getOperand(1).getValueType();
15863 unsigned Opcode = N->getOperand(1).getOpcode();
15864
15865 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15866 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15867 SDValue Val = combineStoreFPToInt(N, DCI);
15868 if (Val)
15869 return Val;
15870 }
15871
15872 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15873 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15874 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15875 if (Val)
15876 return Val;
15877 }
15878
15879 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15880 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15881 N->getOperand(1).getNode()->hasOneUse() &&
15882 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15883 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15884
15885 // STBRX can only handle simple types and it makes no sense to store less
15886 // two bytes in byte-reversed order.
15887 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15888 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15889 break;
15890
15891 SDValue BSwapOp = N->getOperand(1).getOperand(0);
15892 // Do an any-extend to 32-bits if this is a half-word input.
15893 if (BSwapOp.getValueType() == MVT::i16)
15894 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15895
15896 // If the type of BSWAP operand is wider than stored memory width
15897 // it need to be shifted to the right side before STBRX.
15898 if (Op1VT.bitsGT(mVT)) {
15899 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15900 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15901 DAG.getConstant(Shift, dl, MVT::i32));
15902 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15903 if (Op1VT == MVT::i64)
15904 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15905 }
15906
15907 SDValue Ops[] = {
15908 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15909 };
15910 return
15911 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15912 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15913 cast<StoreSDNode>(N)->getMemOperand());
15914 }
15915
15916 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15917 // So it can increase the chance of CSE constant construction.
15918 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15919 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15920 // Need to sign-extended to 64-bits to handle negative values.
15921 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15922 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15923 MemVT.getSizeInBits());
15924 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15925
15926 // DAG.getTruncStore() can't be used here because it doesn't accept
15927 // the general (base + offset) addressing mode.
15928 // So we use UpdateNodeOperands and setTruncatingStore instead.
15929 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15930 N->getOperand(3));
15931 cast<StoreSDNode>(N)->setTruncatingStore(true);
15932 return SDValue(N, 0);
15933 }
15934
15935 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15936 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15937 if (Op1VT.isSimple()) {
15938 MVT StoreVT = Op1VT.getSimpleVT();
15939 if (Subtarget.needsSwapsForVSXMemOps() &&
15940 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15941 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15942 return expandVSXStoreForLE(N, DCI);
15943 }
15944 break;
15945 }
15946 case ISD::LOAD: {
15947 LoadSDNode *LD = cast<LoadSDNode>(N);
15948 EVT VT = LD->getValueType(0);
15949
15950 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15951 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15952 if (VT.isSimple()) {
15953 MVT LoadVT = VT.getSimpleVT();
15954 if (Subtarget.needsSwapsForVSXMemOps() &&
15955 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15956 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15957 return expandVSXLoadForLE(N, DCI);
15958 }
15959
15960 // We sometimes end up with a 64-bit integer load, from which we extract
15961 // two single-precision floating-point numbers. This happens with
15962 // std::complex<float>, and other similar structures, because of the way we
15963 // canonicalize structure copies. However, if we lack direct moves,
15964 // then the final bitcasts from the extracted integer values to the
15965 // floating-point numbers turn into store/load pairs. Even with direct moves,
15966 // just loading the two floating-point numbers is likely better.
15967 auto ReplaceTwoFloatLoad = [&]() {
15968 if (VT != MVT::i64)
15969 return false;
15970
15971 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15972 LD->isVolatile())
15973 return false;
15974
15975 // We're looking for a sequence like this:
15976 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15977 // t16: i64 = srl t13, Constant:i32<32>
15978 // t17: i32 = truncate t16
15979 // t18: f32 = bitcast t17
15980 // t19: i32 = truncate t13
15981 // t20: f32 = bitcast t19
15982
15983 if (!LD->hasNUsesOfValue(2, 0))
15984 return false;
15985
15986 auto UI = LD->use_begin();
15987 while (UI.getUse().getResNo() != 0) ++UI;
15988 SDNode *Trunc = *UI++;
15989 while (UI.getUse().getResNo() != 0) ++UI;
15990 SDNode *RightShift = *UI;
15991 if (Trunc->getOpcode() != ISD::TRUNCATE)
15992 std::swap(Trunc, RightShift);
15993
15994 if (Trunc->getOpcode() != ISD::TRUNCATE ||
15995 Trunc->getValueType(0) != MVT::i32 ||
15996 !Trunc->hasOneUse())
15997 return false;
15998 if (RightShift->getOpcode() != ISD::SRL ||
15999 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
16000 RightShift->getConstantOperandVal(1) != 32 ||
16001 !RightShift->hasOneUse())
16002 return false;
16003
16004 SDNode *Trunc2 = *RightShift->use_begin();
16005 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16006 Trunc2->getValueType(0) != MVT::i32 ||
16007 !Trunc2->hasOneUse())
16008 return false;
16009
16010 SDNode *Bitcast = *Trunc->use_begin();
16011 SDNode *Bitcast2 = *Trunc2->use_begin();
16012
16013 if (Bitcast->getOpcode() != ISD::BITCAST ||
16014 Bitcast->getValueType(0) != MVT::f32)
16015 return false;
16016 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16017 Bitcast2->getValueType(0) != MVT::f32)
16018 return false;
16019
16020 if (Subtarget.isLittleEndian())
16021 std::swap(Bitcast, Bitcast2);
16022
16023 // Bitcast has the second float (in memory-layout order) and Bitcast2
16024 // has the first one.
16025
16026 SDValue BasePtr = LD->getBasePtr();
16027 if (LD->isIndexed()) {
16028 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16029 "Non-pre-inc AM on PPC?");
16030 BasePtr =
16031 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16032 LD->getOffset());
16033 }
16034
16035 auto MMOFlags =
16036 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16037 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16038 LD->getPointerInfo(), LD->getAlign(),
16039 MMOFlags, LD->getAAInfo());
16040 SDValue AddPtr =
16041 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16042 BasePtr, DAG.getIntPtrConstant(4, dl));
16043 SDValue FloatLoad2 = DAG.getLoad(
16044 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16045 LD->getPointerInfo().getWithOffset(4),
16046 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16047
16048 if (LD->isIndexed()) {
16049 // Note that DAGCombine should re-form any pre-increment load(s) from
16050 // what is produced here if that makes sense.
16051 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16052 }
16053
16054 DCI.CombineTo(Bitcast2, FloatLoad);
16055 DCI.CombineTo(Bitcast, FloatLoad2);
16056
16057 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16058 SDValue(FloatLoad2.getNode(), 1));
16059 return true;
16060 };
16061
16062 if (ReplaceTwoFloatLoad())
16063 return SDValue(N, 0);
16064
16065 EVT MemVT = LD->getMemoryVT();
16066 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16067 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16068 if (LD->isUnindexed() && VT.isVector() &&
16069 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16070 // P8 and later hardware should just use LOAD.
16071 !Subtarget.hasP8Vector() &&
16072 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16073 VT == MVT::v4f32))) &&
16074 LD->getAlign() < ABIAlignment) {
16075 // This is a type-legal unaligned Altivec load.
16076 SDValue Chain = LD->getChain();
16077 SDValue Ptr = LD->getBasePtr();
16078 bool isLittleEndian = Subtarget.isLittleEndian();
16079
16080 // This implements the loading of unaligned vectors as described in
16081 // the venerable Apple Velocity Engine overview. Specifically:
16082 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16083 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16084 //
16085 // The general idea is to expand a sequence of one or more unaligned
16086 // loads into an alignment-based permutation-control instruction (lvsl
16087 // or lvsr), a series of regular vector loads (which always truncate
16088 // their input address to an aligned address), and a series of
16089 // permutations. The results of these permutations are the requested
16090 // loaded values. The trick is that the last "extra" load is not taken
16091 // from the address you might suspect (sizeof(vector) bytes after the
16092 // last requested load), but rather sizeof(vector) - 1 bytes after the
16093 // last requested vector. The point of this is to avoid a page fault if
16094 // the base address happened to be aligned. This works because if the
16095 // base address is aligned, then adding less than a full vector length
16096 // will cause the last vector in the sequence to be (re)loaded.
16097 // Otherwise, the next vector will be fetched as you might suspect was
16098 // necessary.
16099
16100 // We might be able to reuse the permutation generation from
16101 // a different base address offset from this one by an aligned amount.
16102 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16103 // optimization later.
16104 Intrinsic::ID Intr, IntrLD, IntrPerm;
16105 MVT PermCntlTy, PermTy, LDTy;
16106 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16107 : Intrinsic::ppc_altivec_lvsl;
16108 IntrLD = Intrinsic::ppc_altivec_lvx;
16109 IntrPerm = Intrinsic::ppc_altivec_vperm;
16110 PermCntlTy = MVT::v16i8;
16111 PermTy = MVT::v4i32;
16112 LDTy = MVT::v4i32;
16113
16114 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16115
16116 // Create the new MMO for the new base load. It is like the original MMO,
16117 // but represents an area in memory almost twice the vector size centered
16118 // on the original address. If the address is unaligned, we might start
16119 // reading up to (sizeof(vector)-1) bytes below the address of the
16120 // original unaligned load.
16122 MachineMemOperand *BaseMMO =
16123 MF.getMachineMemOperand(LD->getMemOperand(),
16124 -(int64_t)MemVT.getStoreSize()+1,
16125 2*MemVT.getStoreSize()-1);
16126
16127 // Create the new base load.
16128 SDValue LDXIntID =
16129 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16130 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16131 SDValue BaseLoad =
16133 DAG.getVTList(PermTy, MVT::Other),
16134 BaseLoadOps, LDTy, BaseMMO);
16135
16136 // Note that the value of IncOffset (which is provided to the next
16137 // load's pointer info offset value, and thus used to calculate the
16138 // alignment), and the value of IncValue (which is actually used to
16139 // increment the pointer value) are different! This is because we
16140 // require the next load to appear to be aligned, even though it
16141 // is actually offset from the base pointer by a lesser amount.
16142 int IncOffset = VT.getSizeInBits() / 8;
16143 int IncValue = IncOffset;
16144
16145 // Walk (both up and down) the chain looking for another load at the real
16146 // (aligned) offset (the alignment of the other load does not matter in
16147 // this case). If found, then do not use the offset reduction trick, as
16148 // that will prevent the loads from being later combined (as they would
16149 // otherwise be duplicates).
16150 if (!findConsecutiveLoad(LD, DAG))
16151 --IncValue;
16152
16153 SDValue Increment =
16154 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16155 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16156
16157 MachineMemOperand *ExtraMMO =
16158 MF.getMachineMemOperand(LD->getMemOperand(),
16159 1, 2*MemVT.getStoreSize()-1);
16160 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16161 SDValue ExtraLoad =
16163 DAG.getVTList(PermTy, MVT::Other),
16164 ExtraLoadOps, LDTy, ExtraMMO);
16165
16166 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16167 BaseLoad.getValue(1), ExtraLoad.getValue(1));
16168
16169 // Because vperm has a big-endian bias, we must reverse the order
16170 // of the input vectors and complement the permute control vector
16171 // when generating little endian code. We have already handled the
16172 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16173 // and ExtraLoad here.
16174 SDValue Perm;
16175 if (isLittleEndian)
16176 Perm = BuildIntrinsicOp(IntrPerm,
16177 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16178 else
16179 Perm = BuildIntrinsicOp(IntrPerm,
16180 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16181
16182 if (VT != PermTy)
16183 Perm = Subtarget.hasAltivec()
16184 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16185 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16186 DAG.getTargetConstant(1, dl, MVT::i64));
16187 // second argument is 1 because this rounding
16188 // is always exact.
16189
16190 // The output of the permutation is our loaded result, the TokenFactor is
16191 // our new chain.
16192 DCI.CombineTo(N, Perm, TF);
16193 return SDValue(N, 0);
16194 }
16195 }
16196 break;
16198 bool isLittleEndian = Subtarget.isLittleEndian();
16199 unsigned IID = N->getConstantOperandVal(0);
16200 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16201 : Intrinsic::ppc_altivec_lvsl);
16202 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16203 SDValue Add = N->getOperand(1);
16204
16205 int Bits = 4 /* 16 byte alignment */;
16206
16207 if (DAG.MaskedValueIsZero(Add->getOperand(1),
16208 APInt::getAllOnes(Bits /* alignment */)
16209 .zext(Add.getScalarValueSizeInBits()))) {
16210 SDNode *BasePtr = Add->getOperand(0).getNode();
16211 for (SDNode *U : BasePtr->uses()) {
16212 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16213 U->getConstantOperandVal(0) == IID) {
16214 // We've found another LVSL/LVSR, and this address is an aligned
16215 // multiple of that one. The results will be the same, so use the
16216 // one we've just found instead.
16217
16218 return SDValue(U, 0);
16219 }
16220 }
16221 }
16222
16223 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16224 SDNode *BasePtr = Add->getOperand(0).getNode();
16225 for (SDNode *U : BasePtr->uses()) {
16226 if (U->getOpcode() == ISD::ADD &&
16227 isa<ConstantSDNode>(U->getOperand(1)) &&
16228 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16229 (1ULL << Bits) ==
16230 0) {
16231 SDNode *OtherAdd = U;
16232 for (SDNode *V : OtherAdd->uses()) {
16233 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16234 V->getConstantOperandVal(0) == IID) {
16235 return SDValue(V, 0);
16236 }
16237 }
16238 }
16239 }
16240 }
16241 }
16242
16243 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16244 // Expose the vabsduw/h/b opportunity for down stream
16245 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16246 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16247 IID == Intrinsic::ppc_altivec_vmaxsh ||
16248 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16249 SDValue V1 = N->getOperand(1);
16250 SDValue V2 = N->getOperand(2);
16251 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16252 V1.getSimpleValueType() == MVT::v8i16 ||
16253 V1.getSimpleValueType() == MVT::v16i8) &&
16254 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16255 // (0-a, a)
16256 if (V1.getOpcode() == ISD::SUB &&
16258 V1.getOperand(1) == V2) {
16259 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16260 }
16261 // (a, 0-a)
16262 if (V2.getOpcode() == ISD::SUB &&
16263 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16264 V2.getOperand(1) == V1) {
16265 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16266 }
16267 // (x-y, y-x)
16268 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16269 V1.getOperand(0) == V2.getOperand(1) &&
16270 V1.getOperand(1) == V2.getOperand(0)) {
16271 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16272 }
16273 }
16274 }
16275 }
16276
16277 break;
16279 switch (N->getConstantOperandVal(1)) {
16280 default:
16281 break;
16282 case Intrinsic::ppc_altivec_vsum4sbs:
16283 case Intrinsic::ppc_altivec_vsum4shs:
16284 case Intrinsic::ppc_altivec_vsum4ubs: {
16285 // These sum-across intrinsics only have a chain due to the side effect
16286 // that they may set the SAT bit. If we know the SAT bit will not be set
16287 // for some inputs, we can replace any uses of their chain with the
16288 // input chain.
16289 if (BuildVectorSDNode *BVN =
16290 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16291 APInt APSplatBits, APSplatUndef;
16292 unsigned SplatBitSize;
16293 bool HasAnyUndefs;
16294 bool BVNIsConstantSplat = BVN->isConstantSplat(
16295 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16296 !Subtarget.isLittleEndian());
16297 // If the constant splat vector is 0, the SAT bit will not be set.
16298 if (BVNIsConstantSplat && APSplatBits == 0)
16299 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16300 }
16301 return SDValue();
16302 }
16303 case Intrinsic::ppc_vsx_lxvw4x:
16304 case Intrinsic::ppc_vsx_lxvd2x:
16305 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16306 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16307 if (Subtarget.needsSwapsForVSXMemOps())
16308 return expandVSXLoadForLE(N, DCI);
16309 break;
16310 }
16311 break;
16313 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16314 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16315 if (Subtarget.needsSwapsForVSXMemOps()) {
16316 switch (N->getConstantOperandVal(1)) {
16317 default:
16318 break;
16319 case Intrinsic::ppc_vsx_stxvw4x:
16320 case Intrinsic::ppc_vsx_stxvd2x:
16321 return expandVSXStoreForLE(N, DCI);
16322 }
16323 }
16324 break;
16325 case ISD::BSWAP: {
16326 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16327 // For subtargets without LDBRX, we can still do better than the default
16328 // expansion even for 64-bit BSWAP (LOAD).
16329 bool Is64BitBswapOn64BitTgt =
16330 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16331 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16332 N->getOperand(0).hasOneUse();
16333 if (IsSingleUseNormalLd &&
16334 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16335 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16336 SDValue Load = N->getOperand(0);
16337 LoadSDNode *LD = cast<LoadSDNode>(Load);
16338 // Create the byte-swapping load.
16339 SDValue Ops[] = {
16340 LD->getChain(), // Chain
16341 LD->getBasePtr(), // Ptr
16342 DAG.getValueType(N->getValueType(0)) // VT
16343 };
16344 SDValue BSLoad =
16346 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16347 MVT::i64 : MVT::i32, MVT::Other),
16348 Ops, LD->getMemoryVT(), LD->getMemOperand());
16349
16350 // If this is an i16 load, insert the truncate.
16351 SDValue ResVal = BSLoad;
16352 if (N->getValueType(0) == MVT::i16)
16353 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16354
16355 // First, combine the bswap away. This makes the value produced by the
16356 // load dead.
16357 DCI.CombineTo(N, ResVal);
16358
16359 // Next, combine the load away, we give it a bogus result value but a real
16360 // chain result. The result value is dead because the bswap is dead.
16361 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16362
16363 // Return N so it doesn't get rechecked!
16364 return SDValue(N, 0);
16365 }
16366 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16367 // before legalization so that the BUILD_PAIR is handled correctly.
16368 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16369 !IsSingleUseNormalLd)
16370 return SDValue();
16371 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16372
16373 // Can't split volatile or atomic loads.
16374 if (!LD->isSimple())
16375 return SDValue();
16376 SDValue BasePtr = LD->getBasePtr();
16377 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16378 LD->getPointerInfo(), LD->getAlign());
16379 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16380 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16381 DAG.getIntPtrConstant(4, dl));
16383 LD->getMemOperand(), 4, 4);
16384 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16385 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16386 SDValue Res;
16387 if (Subtarget.isLittleEndian())
16388 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16389 else
16390 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16391 SDValue TF =
16392 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16393 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16394 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16395 return Res;
16396 }
16397 case PPCISD::VCMP:
16398 // If a VCMP_rec node already exists with exactly the same operands as this
16399 // node, use its result instead of this node (VCMP_rec computes both a CR6
16400 // and a normal output).
16401 //
16402 if (!N->getOperand(0).hasOneUse() &&
16403 !N->getOperand(1).hasOneUse() &&
16404 !N->getOperand(2).hasOneUse()) {
16405
16406 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16407 SDNode *VCMPrecNode = nullptr;
16408
16409 SDNode *LHSN = N->getOperand(0).getNode();
16410 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16411 UI != E; ++UI)
16412 if (UI->getOpcode() == PPCISD::VCMP_rec &&
16413 UI->getOperand(1) == N->getOperand(1) &&
16414 UI->getOperand(2) == N->getOperand(2) &&
16415 UI->getOperand(0) == N->getOperand(0)) {
16416 VCMPrecNode = *UI;
16417 break;
16418 }
16419
16420 // If there is no VCMP_rec node, or if the flag value has a single use,
16421 // don't transform this.
16422 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16423 break;
16424
16425 // Look at the (necessarily single) use of the flag value. If it has a
16426 // chain, this transformation is more complex. Note that multiple things
16427 // could use the value result, which we should ignore.
16428 SDNode *FlagUser = nullptr;
16429 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16430 FlagUser == nullptr; ++UI) {
16431 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16432 SDNode *User = *UI;
16433 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16434 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16435 FlagUser = User;
16436 break;
16437 }
16438 }
16439 }
16440
16441 // If the user is a MFOCRF instruction, we know this is safe.
16442 // Otherwise we give up for right now.
16443 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16444 return SDValue(VCMPrecNode, 0);
16445 }
16446 break;
16447 case ISD::BR_CC: {
16448 // If this is a branch on an altivec predicate comparison, lower this so
16449 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16450 // lowering is done pre-legalize, because the legalizer lowers the predicate
16451 // compare down to code that is difficult to reassemble.
16452 // This code also handles branches that depend on the result of a store
16453 // conditional.
16454 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16455 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16456
16457 int CompareOpc;
16458 bool isDot;
16459
16460 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16461 break;
16462
16463 // Since we are doing this pre-legalize, the RHS can be a constant of
16464 // arbitrary bitwidth which may cause issues when trying to get the value
16465 // from the underlying APInt.
16466 auto RHSAPInt = RHS->getAsAPIntVal();
16467 if (!RHSAPInt.isIntN(64))
16468 break;
16469
16470 unsigned Val = RHSAPInt.getZExtValue();
16471 auto isImpossibleCompare = [&]() {
16472 // If this is a comparison against something other than 0/1, then we know
16473 // that the condition is never/always true.
16474 if (Val != 0 && Val != 1) {
16475 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16476 return N->getOperand(0);
16477 // Always !=, turn it into an unconditional branch.
16478 return DAG.getNode(ISD::BR, dl, MVT::Other,
16479 N->getOperand(0), N->getOperand(4));
16480 }
16481 return SDValue();
16482 };
16483 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16484 unsigned StoreWidth = 0;
16485 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16486 isStoreConditional(LHS, StoreWidth)) {
16487 if (SDValue Impossible = isImpossibleCompare())
16488 return Impossible;
16489 PPC::Predicate CompOpc;
16490 // eq 0 => ne
16491 // ne 0 => eq
16492 // eq 1 => eq
16493 // ne 1 => ne
16494 if (Val == 0)
16495 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16496 else
16497 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16498
16499 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16500 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16501 auto *MemNode = cast<MemSDNode>(LHS);
16502 SDValue ConstSt = DAG.getMemIntrinsicNode(
16504 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16505 MemNode->getMemoryVT(), MemNode->getMemOperand());
16506
16507 SDValue InChain;
16508 // Unchain the branch from the original store conditional.
16509 if (N->getOperand(0) == LHS.getValue(1))
16510 InChain = LHS.getOperand(0);
16511 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16512 SmallVector<SDValue, 4> InChains;
16513 SDValue InTF = N->getOperand(0);
16514 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16515 if (InTF.getOperand(i) != LHS.getValue(1))
16516 InChains.push_back(InTF.getOperand(i));
16517 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16518 }
16519
16520 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16521 DAG.getConstant(CompOpc, dl, MVT::i32),
16522 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16523 ConstSt.getValue(2));
16524 }
16525
16526 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16527 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16528 assert(isDot && "Can't compare against a vector result!");
16529
16530 if (SDValue Impossible = isImpossibleCompare())
16531 return Impossible;
16532
16533 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16534 // Create the PPCISD altivec 'dot' comparison node.
16535 SDValue Ops[] = {
16536 LHS.getOperand(2), // LHS of compare
16537 LHS.getOperand(3), // RHS of compare
16538 DAG.getConstant(CompareOpc, dl, MVT::i32)
16539 };
16540 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16541 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16542
16543 // Unpack the result based on how the target uses it.
16544 PPC::Predicate CompOpc;
16545 switch (LHS.getConstantOperandVal(1)) {
16546 default: // Can't happen, don't crash on invalid number though.
16547 case 0: // Branch on the value of the EQ bit of CR6.
16548 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16549 break;
16550 case 1: // Branch on the inverted value of the EQ bit of CR6.
16551 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16552 break;
16553 case 2: // Branch on the value of the LT bit of CR6.
16554 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16555 break;
16556 case 3: // Branch on the inverted value of the LT bit of CR6.
16557 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16558 break;
16559 }
16560
16561 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16562 DAG.getConstant(CompOpc, dl, MVT::i32),
16563 DAG.getRegister(PPC::CR6, MVT::i32),
16564 N->getOperand(4), CompNode.getValue(1));
16565 }
16566 break;
16567 }
16568 case ISD::BUILD_VECTOR:
16569 return DAGCombineBuildVector(N, DCI);
16570 }
16571
16572 return SDValue();
16573}
16574
16575SDValue
16577 SelectionDAG &DAG,
16578 SmallVectorImpl<SDNode *> &Created) const {
16579 // fold (sdiv X, pow2)
16580 EVT VT = N->getValueType(0);
16581 if (VT == MVT::i64 && !Subtarget.isPPC64())
16582 return SDValue();
16583 if ((VT != MVT::i32 && VT != MVT::i64) ||
16584 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16585 return SDValue();
16586
16587 SDLoc DL(N);
16588 SDValue N0 = N->getOperand(0);
16589
16590 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16591 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16592 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16593
16594 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16595 Created.push_back(Op.getNode());
16596
16597 if (IsNegPow2) {
16598 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16599 Created.push_back(Op.getNode());
16600 }
16601
16602 return Op;
16603}
16604
16605//===----------------------------------------------------------------------===//
16606// Inline Assembly Support
16607//===----------------------------------------------------------------------===//
16608
16610 KnownBits &Known,
16611 const APInt &DemandedElts,
16612 const SelectionDAG &DAG,
16613 unsigned Depth) const {
16614 Known.resetAll();
16615 switch (Op.getOpcode()) {
16616 default: break;
16617 case PPCISD::LBRX: {
16618 // lhbrx is known to have the top bits cleared out.
16619 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16620 Known.Zero = 0xFFFF0000;
16621 break;
16622 }
16624 switch (Op.getConstantOperandVal(0)) {
16625 default: break;
16626 case Intrinsic::ppc_altivec_vcmpbfp_p:
16627 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16628 case Intrinsic::ppc_altivec_vcmpequb_p:
16629 case Intrinsic::ppc_altivec_vcmpequh_p:
16630 case Intrinsic::ppc_altivec_vcmpequw_p:
16631 case Intrinsic::ppc_altivec_vcmpequd_p:
16632 case Intrinsic::ppc_altivec_vcmpequq_p:
16633 case Intrinsic::ppc_altivec_vcmpgefp_p:
16634 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16635 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16636 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16637 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16638 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16639 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16640 case Intrinsic::ppc_altivec_vcmpgtub_p:
16641 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16642 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16643 case Intrinsic::ppc_altivec_vcmpgtud_p:
16644 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16645 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16646 break;
16647 }
16648 break;
16649 }
16651 switch (Op.getConstantOperandVal(1)) {
16652 default:
16653 break;
16654 case Intrinsic::ppc_load2r:
16655 // Top bits are cleared for load2r (which is the same as lhbrx).
16656 Known.Zero = 0xFFFF0000;
16657 break;
16658 }
16659 break;
16660 }
16661 }
16662}
16663
16665 switch (Subtarget.getCPUDirective()) {
16666 default: break;
16667 case PPC::DIR_970:
16668 case PPC::DIR_PWR4:
16669 case PPC::DIR_PWR5:
16670 case PPC::DIR_PWR5X:
16671 case PPC::DIR_PWR6:
16672 case PPC::DIR_PWR6X:
16673 case PPC::DIR_PWR7:
16674 case PPC::DIR_PWR8:
16675 case PPC::DIR_PWR9:
16676 case PPC::DIR_PWR10:
16677 case PPC::DIR_PWR_FUTURE: {
16678 if (!ML)
16679 break;
16680
16682 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16683 // so that we can decrease cache misses and branch-prediction misses.
16684 // Actual alignment of the loop will depend on the hotness check and other
16685 // logic in alignBlocks.
16686 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16687 return Align(32);
16688 }
16689
16690 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16691
16692 // For small loops (between 5 and 8 instructions), align to a 32-byte
16693 // boundary so that the entire loop fits in one instruction-cache line.
16694 uint64_t LoopSize = 0;
16695 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16696 for (const MachineInstr &J : **I) {
16697 LoopSize += TII->getInstSizeInBytes(J);
16698 if (LoopSize > 32)
16699 break;
16700 }
16701
16702 if (LoopSize > 16 && LoopSize <= 32)
16703 return Align(32);
16704
16705 break;
16706 }
16707 }
16708
16710}
16711
16712/// getConstraintType - Given a constraint, return the type of
16713/// constraint it is for this target.
16716 if (Constraint.size() == 1) {
16717 switch (Constraint[0]) {
16718 default: break;
16719 case 'b':
16720 case 'r':
16721 case 'f':
16722 case 'd':
16723 case 'v':
16724 case 'y':
16725 return C_RegisterClass;
16726 case 'Z':
16727 // FIXME: While Z does indicate a memory constraint, it specifically
16728 // indicates an r+r address (used in conjunction with the 'y' modifier
16729 // in the replacement string). Currently, we're forcing the base
16730 // register to be r0 in the asm printer (which is interpreted as zero)
16731 // and forming the complete address in the second register. This is
16732 // suboptimal.
16733 return C_Memory;
16734 }
16735 } else if (Constraint == "wc") { // individual CR bits.
16736 return C_RegisterClass;
16737 } else if (Constraint == "wa" || Constraint == "wd" ||
16738 Constraint == "wf" || Constraint == "ws" ||
16739 Constraint == "wi" || Constraint == "ww") {
16740 return C_RegisterClass; // VSX registers.
16741 }
16742 return TargetLowering::getConstraintType(Constraint);
16743}
16744
16745/// Examine constraint type and operand type and determine a weight value.
16746/// This object must already have been set up with the operand type
16747/// and the current alternative constraint selected.
16750 AsmOperandInfo &info, const char *constraint) const {
16752 Value *CallOperandVal = info.CallOperandVal;
16753 // If we don't have a value, we can't do a match,
16754 // but allow it at the lowest weight.
16755 if (!CallOperandVal)
16756 return CW_Default;
16757 Type *type = CallOperandVal->getType();
16758
16759 // Look at the constraint type.
16760 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16761 return CW_Register; // an individual CR bit.
16762 else if ((StringRef(constraint) == "wa" ||
16763 StringRef(constraint) == "wd" ||
16764 StringRef(constraint) == "wf") &&
16765 type->isVectorTy())
16766 return CW_Register;
16767 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16768 return CW_Register; // just hold 64-bit integers data.
16769 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16770 return CW_Register;
16771 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16772 return CW_Register;
16773
16774 switch (*constraint) {
16775 default:
16777 break;
16778 case 'b':
16779 if (type->isIntegerTy())
16780 weight = CW_Register;
16781 break;
16782 case 'f':
16783 if (type->isFloatTy())
16784 weight = CW_Register;
16785 break;
16786 case 'd':
16787 if (type->isDoubleTy())
16788 weight = CW_Register;
16789 break;
16790 case 'v':
16791 if (type->isVectorTy())
16792 weight = CW_Register;
16793 break;
16794 case 'y':
16795 weight = CW_Register;
16796 break;
16797 case 'Z':
16798 weight = CW_Memory;
16799 break;
16800 }
16801 return weight;
16802}
16803
16804std::pair<unsigned, const TargetRegisterClass *>
16806 StringRef Constraint,
16807 MVT VT) const {
16808 if (Constraint.size() == 1) {
16809 // GCC RS6000 Constraint Letters
16810 switch (Constraint[0]) {
16811 case 'b': // R1-R31
16812 if (VT == MVT::i64 && Subtarget.isPPC64())
16813 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16814 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16815 case 'r': // R0-R31
16816 if (VT == MVT::i64 && Subtarget.isPPC64())
16817 return std::make_pair(0U, &PPC::G8RCRegClass);
16818 return std::make_pair(0U, &PPC::GPRCRegClass);
16819 // 'd' and 'f' constraints are both defined to be "the floating point
16820 // registers", where one is for 32-bit and the other for 64-bit. We don't
16821 // really care overly much here so just give them all the same reg classes.
16822 case 'd':
16823 case 'f':
16824 if (Subtarget.hasSPE()) {
16825 if (VT == MVT::f32 || VT == MVT::i32)
16826 return std::make_pair(0U, &PPC::GPRCRegClass);
16827 if (VT == MVT::f64 || VT == MVT::i64)
16828 return std::make_pair(0U, &PPC::SPERCRegClass);
16829 } else {
16830 if (VT == MVT::f32 || VT == MVT::i32)
16831 return std::make_pair(0U, &PPC::F4RCRegClass);
16832 if (VT == MVT::f64 || VT == MVT::i64)
16833 return std::make_pair(0U, &PPC::F8RCRegClass);
16834 }
16835 break;
16836 case 'v':
16837 if (Subtarget.hasAltivec() && VT.isVector())
16838 return std::make_pair(0U, &PPC::VRRCRegClass);
16839 else if (Subtarget.hasVSX())
16840 // Scalars in Altivec registers only make sense with VSX.
16841 return std::make_pair(0U, &PPC::VFRCRegClass);
16842 break;
16843 case 'y': // crrc
16844 return std::make_pair(0U, &PPC::CRRCRegClass);
16845 }
16846 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16847 // An individual CR bit.
16848 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16849 } else if ((Constraint == "wa" || Constraint == "wd" ||
16850 Constraint == "wf" || Constraint == "wi") &&
16851 Subtarget.hasVSX()) {
16852 // A VSX register for either a scalar (FP) or vector. There is no
16853 // support for single precision scalars on subtargets prior to Power8.
16854 if (VT.isVector())
16855 return std::make_pair(0U, &PPC::VSRCRegClass);
16856 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16857 return std::make_pair(0U, &PPC::VSSRCRegClass);
16858 return std::make_pair(0U, &PPC::VSFRCRegClass);
16859 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16860 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16861 return std::make_pair(0U, &PPC::VSSRCRegClass);
16862 else
16863 return std::make_pair(0U, &PPC::VSFRCRegClass);
16864 } else if (Constraint == "lr") {
16865 if (VT == MVT::i64)
16866 return std::make_pair(0U, &PPC::LR8RCRegClass);
16867 else
16868 return std::make_pair(0U, &PPC::LRRCRegClass);
16869 }
16870
16871 // Handle special cases of physical registers that are not properly handled
16872 // by the base class.
16873 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16874 // If we name a VSX register, we can't defer to the base class because it
16875 // will not recognize the correct register (their names will be VSL{0-31}
16876 // and V{0-31} so they won't match). So we match them here.
16877 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16878 int VSNum = atoi(Constraint.data() + 3);
16879 assert(VSNum >= 0 && VSNum <= 63 &&
16880 "Attempted to access a vsr out of range");
16881 if (VSNum < 32)
16882 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16883 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16884 }
16885
16886 // For float registers, we can't defer to the base class as it will match
16887 // the SPILLTOVSRRC class.
16888 if (Constraint.size() > 3 && Constraint[1] == 'f') {
16889 int RegNum = atoi(Constraint.data() + 2);
16890 if (RegNum > 31 || RegNum < 0)
16891 report_fatal_error("Invalid floating point register number");
16892 if (VT == MVT::f32 || VT == MVT::i32)
16893 return Subtarget.hasSPE()
16894 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16895 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16896 if (VT == MVT::f64 || VT == MVT::i64)
16897 return Subtarget.hasSPE()
16898 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16899 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16900 }
16901 }
16902
16903 std::pair<unsigned, const TargetRegisterClass *> R =
16905
16906 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16907 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16908 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16909 // register.
16910 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16911 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16912 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16913 PPC::GPRCRegClass.contains(R.first))
16914 return std::make_pair(TRI->getMatchingSuperReg(R.first,
16915 PPC::sub_32, &PPC::G8RCRegClass),
16916 &PPC::G8RCRegClass);
16917
16918 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16919 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16920 R.first = PPC::CR0;
16921 R.second = &PPC::CRRCRegClass;
16922 }
16923 // FIXME: This warning should ideally be emitted in the front end.
16924 const auto &TM = getTargetMachine();
16925 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16926 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16927 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16928 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16929 errs() << "warning: vector registers 20 to 32 are reserved in the "
16930 "default AIX AltiVec ABI and cannot be used\n";
16931 }
16932
16933 return R;
16934}
16935
16936/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16937/// vector. If it is invalid, don't add anything to Ops.
16939 StringRef Constraint,
16940 std::vector<SDValue> &Ops,
16941 SelectionDAG &DAG) const {
16942 SDValue Result;
16943
16944 // Only support length 1 constraints.
16945 if (Constraint.size() > 1)
16946 return;
16947
16948 char Letter = Constraint[0];
16949 switch (Letter) {
16950 default: break;
16951 case 'I':
16952 case 'J':
16953 case 'K':
16954 case 'L':
16955 case 'M':
16956 case 'N':
16957 case 'O':
16958 case 'P': {
16959 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16960 if (!CST) return; // Must be an immediate to match.
16961 SDLoc dl(Op);
16962 int64_t Value = CST->getSExtValue();
16963 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16964 // numbers are printed as such.
16965 switch (Letter) {
16966 default: llvm_unreachable("Unknown constraint letter!");
16967 case 'I': // "I" is a signed 16-bit constant.
16968 if (isInt<16>(Value))
16969 Result = DAG.getTargetConstant(Value, dl, TCVT);
16970 break;
16971 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16972 if (isShiftedUInt<16, 16>(Value))
16973 Result = DAG.getTargetConstant(Value, dl, TCVT);
16974 break;
16975 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16976 if (isShiftedInt<16, 16>(Value))
16977 Result = DAG.getTargetConstant(Value, dl, TCVT);
16978 break;
16979 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16980 if (isUInt<16>(Value))
16981 Result = DAG.getTargetConstant(Value, dl, TCVT);
16982 break;
16983 case 'M': // "M" is a constant that is greater than 31.
16984 if (Value > 31)
16985 Result = DAG.getTargetConstant(Value, dl, TCVT);
16986 break;
16987 case 'N': // "N" is a positive constant that is an exact power of two.
16988 if (Value > 0 && isPowerOf2_64(Value))
16989 Result = DAG.getTargetConstant(Value, dl, TCVT);
16990 break;
16991 case 'O': // "O" is the constant zero.
16992 if (Value == 0)
16993 Result = DAG.getTargetConstant(Value, dl, TCVT);
16994 break;
16995 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16996 if (isInt<16>(-Value))
16997 Result = DAG.getTargetConstant(Value, dl, TCVT);
16998 break;
16999 }
17000 break;
17001 }
17002 }
17003
17004 if (Result.getNode()) {
17005 Ops.push_back(Result);
17006 return;
17007 }
17008
17009 // Handle standard constraint letters.
17010 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17011}
17012
17015 SelectionDAG &DAG) const {
17016 if (I.getNumOperands() <= 1)
17017 return;
17018 if (!isa<ConstantSDNode>(Ops[1].getNode()))
17019 return;
17020 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17021 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17022 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17023 return;
17024
17025 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17026 Ops.push_back(DAG.getMDNode(MDN));
17027}
17028
17029// isLegalAddressingMode - Return true if the addressing mode represented
17030// by AM is legal for this target, for a load/store of the specified type.
17032 const AddrMode &AM, Type *Ty,
17033 unsigned AS,
17034 Instruction *I) const {
17035 // Vector type r+i form is supported since power9 as DQ form. We don't check
17036 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17037 // imm form is preferred and the offset can be adjusted to use imm form later
17038 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17039 // max offset to check legal addressing mode, we should be a little aggressive
17040 // to contain other offsets for that LSRUse.
17041 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17042 return false;
17043
17044 // PPC allows a sign-extended 16-bit immediate field.
17045 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17046 return false;
17047
17048 // No global is ever allowed as a base.
17049 if (AM.BaseGV)
17050 return false;
17051
17052 // PPC only support r+r,
17053 switch (AM.Scale) {
17054 case 0: // "r+i" or just "i", depending on HasBaseReg.
17055 break;
17056 case 1:
17057 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
17058 return false;
17059 // Otherwise we have r+r or r+i.
17060 break;
17061 case 2:
17062 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
17063 return false;
17064 // Allow 2*r as r+r.
17065 break;
17066 default:
17067 // No other scales are supported.
17068 return false;
17069 }
17070
17071 return true;
17072}
17073
17074SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17075 SelectionDAG &DAG) const {
17077 MachineFrameInfo &MFI = MF.getFrameInfo();
17078 MFI.setReturnAddressIsTaken(true);
17079
17081 return SDValue();
17082
17083 SDLoc dl(Op);
17084 unsigned Depth = Op.getConstantOperandVal(0);
17085
17086 // Make sure the function does not optimize away the store of the RA to
17087 // the stack.
17088 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17089 FuncInfo->setLRStoreRequired();
17090 bool isPPC64 = Subtarget.isPPC64();
17091 auto PtrVT = getPointerTy(MF.getDataLayout());
17092
17093 if (Depth > 0) {
17094 // The link register (return address) is saved in the caller's frame
17095 // not the callee's stack frame. So we must get the caller's frame
17096 // address and load the return address at the LR offset from there.
17097 SDValue FrameAddr =
17098 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17099 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17100 SDValue Offset =
17101 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17102 isPPC64 ? MVT::i64 : MVT::i32);
17103 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17104 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17106 }
17107
17108 // Just load the return address off the stack.
17109 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17110 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17112}
17113
17114SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17115 SelectionDAG &DAG) const {
17116 SDLoc dl(Op);
17117 unsigned Depth = Op.getConstantOperandVal(0);
17118
17120 MachineFrameInfo &MFI = MF.getFrameInfo();
17121 MFI.setFrameAddressIsTaken(true);
17122
17123 EVT PtrVT = getPointerTy(MF.getDataLayout());
17124 bool isPPC64 = PtrVT == MVT::i64;
17125
17126 // Naked functions never have a frame pointer, and so we use r1. For all
17127 // other functions, this decision must be delayed until during PEI.
17128 unsigned FrameReg;
17129 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17130 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17131 else
17132 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17133
17134 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17135 PtrVT);
17136 while (Depth--)
17137 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17138 FrameAddr, MachinePointerInfo());
17139 return FrameAddr;
17140}
17141
17142// FIXME? Maybe this could be a TableGen attribute on some registers and
17143// this table could be generated automatically from RegInfo.
17145 const MachineFunction &MF) const {
17146 bool isPPC64 = Subtarget.isPPC64();
17147
17148 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
17149 if (!is64Bit && VT != LLT::scalar(32))
17150 report_fatal_error("Invalid register global variable type");
17151
17153 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
17154 .Case("r2", isPPC64 ? Register() : PPC::R2)
17155 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
17156 .Default(Register());
17157
17158 if (Reg)
17159 return Reg;
17160 report_fatal_error("Invalid register name global variable");
17161}
17162
17164 // 32-bit SVR4 ABI access everything as got-indirect.
17165 if (Subtarget.is32BitELFABI())
17166 return true;
17167
17168 // AIX accesses everything indirectly through the TOC, which is similar to
17169 // the GOT.
17170 if (Subtarget.isAIXABI())
17171 return true;
17172
17174 // If it is small or large code model, module locals are accessed
17175 // indirectly by loading their address from .toc/.got.
17176 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17177 return true;
17178
17179 // JumpTable and BlockAddress are accessed as got-indirect.
17180 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17181 return true;
17182
17183 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17184 return Subtarget.isGVIndirectSymbol(G->getGlobal());
17185
17186 return false;
17187}
17188
17189bool
17191 // The PowerPC target isn't yet aware of offsets.
17192 return false;
17193}
17194
17196 const CallInst &I,
17197 MachineFunction &MF,
17198 unsigned Intrinsic) const {
17199 switch (Intrinsic) {
17200 case Intrinsic::ppc_atomicrmw_xchg_i128:
17201 case Intrinsic::ppc_atomicrmw_add_i128:
17202 case Intrinsic::ppc_atomicrmw_sub_i128:
17203 case Intrinsic::ppc_atomicrmw_nand_i128:
17204 case Intrinsic::ppc_atomicrmw_and_i128:
17205 case Intrinsic::ppc_atomicrmw_or_i128:
17206 case Intrinsic::ppc_atomicrmw_xor_i128:
17207 case Intrinsic::ppc_cmpxchg_i128:
17209 Info.memVT = MVT::i128;
17210 Info.ptrVal = I.getArgOperand(0);
17211 Info.offset = 0;
17212 Info.align = Align(16);
17215 return true;
17216 case Intrinsic::ppc_atomic_load_i128:
17218 Info.memVT = MVT::i128;
17219 Info.ptrVal = I.getArgOperand(0);
17220 Info.offset = 0;
17221 Info.align = Align(16);
17223 return true;
17224 case Intrinsic::ppc_atomic_store_i128:
17226 Info.memVT = MVT::i128;
17227 Info.ptrVal = I.getArgOperand(2);
17228 Info.offset = 0;
17229 Info.align = Align(16);
17231 return true;
17232 case Intrinsic::ppc_altivec_lvx:
17233 case Intrinsic::ppc_altivec_lvxl:
17234 case Intrinsic::ppc_altivec_lvebx:
17235 case Intrinsic::ppc_altivec_lvehx:
17236 case Intrinsic::ppc_altivec_lvewx:
17237 case Intrinsic::ppc_vsx_lxvd2x:
17238 case Intrinsic::ppc_vsx_lxvw4x:
17239 case Intrinsic::ppc_vsx_lxvd2x_be:
17240 case Intrinsic::ppc_vsx_lxvw4x_be:
17241 case Intrinsic::ppc_vsx_lxvl:
17242 case Intrinsic::ppc_vsx_lxvll: {
17243 EVT VT;
17244 switch (Intrinsic) {
17245 case Intrinsic::ppc_altivec_lvebx:
17246 VT = MVT::i8;
17247 break;
17248 case Intrinsic::ppc_altivec_lvehx:
17249 VT = MVT::i16;
17250 break;
17251 case Intrinsic::ppc_altivec_lvewx:
17252 VT = MVT::i32;
17253 break;
17254 case Intrinsic::ppc_vsx_lxvd2x:
17255 case Intrinsic::ppc_vsx_lxvd2x_be:
17256 VT = MVT::v2f64;
17257 break;
17258 default:
17259 VT = MVT::v4i32;
17260 break;
17261 }
17262
17264 Info.memVT = VT;
17265 Info.ptrVal = I.getArgOperand(0);
17266 Info.offset = -VT.getStoreSize()+1;
17267 Info.size = 2*VT.getStoreSize()-1;
17268 Info.align = Align(1);
17270 return true;
17271 }
17272 case Intrinsic::ppc_altivec_stvx:
17273 case Intrinsic::ppc_altivec_stvxl:
17274 case Intrinsic::ppc_altivec_stvebx:
17275 case Intrinsic::ppc_altivec_stvehx:
17276 case Intrinsic::ppc_altivec_stvewx:
17277 case Intrinsic::ppc_vsx_stxvd2x:
17278 case Intrinsic::ppc_vsx_stxvw4x:
17279 case Intrinsic::ppc_vsx_stxvd2x_be:
17280 case Intrinsic::ppc_vsx_stxvw4x_be:
17281 case Intrinsic::ppc_vsx_stxvl:
17282 case Intrinsic::ppc_vsx_stxvll: {
17283 EVT VT;
17284 switch (Intrinsic) {
17285 case Intrinsic::ppc_altivec_stvebx:
17286 VT = MVT::i8;
17287 break;
17288 case Intrinsic::ppc_altivec_stvehx:
17289 VT = MVT::i16;
17290 break;
17291 case Intrinsic::ppc_altivec_stvewx:
17292 VT = MVT::i32;
17293 break;
17294 case Intrinsic::ppc_vsx_stxvd2x:
17295 case Intrinsic::ppc_vsx_stxvd2x_be:
17296 VT = MVT::v2f64;
17297 break;
17298 default:
17299 VT = MVT::v4i32;
17300 break;
17301 }
17302
17304 Info.memVT = VT;
17305 Info.ptrVal = I.getArgOperand(1);
17306 Info.offset = -VT.getStoreSize()+1;
17307 Info.size = 2*VT.getStoreSize()-1;
17308 Info.align = Align(1);
17310 return true;
17311 }
17312 case Intrinsic::ppc_stdcx:
17313 case Intrinsic::ppc_stwcx:
17314 case Intrinsic::ppc_sthcx:
17315 case Intrinsic::ppc_stbcx: {
17316 EVT VT;
17317 auto Alignment = Align(8);
17318 switch (Intrinsic) {
17319 case Intrinsic::ppc_stdcx:
17320 VT = MVT::i64;
17321 break;
17322 case Intrinsic::ppc_stwcx:
17323 VT = MVT::i32;
17324 Alignment = Align(4);
17325 break;
17326 case Intrinsic::ppc_sthcx:
17327 VT = MVT::i16;
17328 Alignment = Align(2);
17329 break;
17330 case Intrinsic::ppc_stbcx:
17331 VT = MVT::i8;
17332 Alignment = Align(1);
17333 break;
17334 }
17336 Info.memVT = VT;
17337 Info.ptrVal = I.getArgOperand(0);
17338 Info.offset = 0;
17339 Info.align = Alignment;
17341 return true;
17342 }
17343 default:
17344 break;
17345 }
17346
17347 return false;
17348}
17349
17350/// It returns EVT::Other if the type should be determined using generic
17351/// target-independent logic.
17353 const MemOp &Op, const AttributeList &FuncAttributes) const {
17354 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17355 // We should use Altivec/VSX loads and stores when available. For unaligned
17356 // addresses, unaligned VSX loads are only fast starting with the P8.
17357 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17358 if (Op.isMemset() && Subtarget.hasVSX()) {
17359 uint64_t TailSize = Op.size() % 16;
17360 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17361 // element if vector element type matches tail store. For tail size
17362 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17363 if (TailSize > 2 && TailSize <= 4) {
17364 return MVT::v8i16;
17365 }
17366 return MVT::v4i32;
17367 }
17368 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17369 return MVT::v4i32;
17370 }
17371 }
17372
17373 if (Subtarget.isPPC64()) {
17374 return MVT::i64;
17375 }
17376
17377 return MVT::i32;
17378}
17379
17380/// Returns true if it is beneficial to convert a load of a constant
17381/// to just the constant itself.
17383 Type *Ty) const {
17384 assert(Ty->isIntegerTy());
17385
17386 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17387 return !(BitSize == 0 || BitSize > 64);
17388}
17389
17391 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17392 return false;
17393 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17394 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17395 return NumBits1 == 64 && NumBits2 == 32;
17396}
17397
17399 if (!VT1.isInteger() || !VT2.isInteger())
17400 return false;
17401 unsigned NumBits1 = VT1.getSizeInBits();
17402 unsigned NumBits2 = VT2.getSizeInBits();
17403 return NumBits1 == 64 && NumBits2 == 32;
17404}
17405
17407 // Generally speaking, zexts are not free, but they are free when they can be
17408 // folded with other operations.
17409 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17410 EVT MemVT = LD->getMemoryVT();
17411 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17412 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17413 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17414 LD->getExtensionType() == ISD::ZEXTLOAD))
17415 return true;
17416 }
17417
17418 // FIXME: Add other cases...
17419 // - 32-bit shifts with a zext to i64
17420 // - zext after ctlz, bswap, etc.
17421 // - zext after and by a constant mask
17422
17423 return TargetLowering::isZExtFree(Val, VT2);
17424}
17425
17426bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17427 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17428 "invalid fpext types");
17429 // Extending to float128 is not free.
17430 if (DestVT == MVT::f128)
17431 return false;
17432 return true;
17433}
17434
17436 return isInt<16>(Imm) || isUInt<16>(Imm);
17437}
17438
17440 return isInt<16>(Imm) || isUInt<16>(Imm);
17441}
17442
17445 unsigned *Fast) const {
17447 return false;
17448
17449 // PowerPC supports unaligned memory access for simple non-vector types.
17450 // Although accessing unaligned addresses is not as efficient as accessing
17451 // aligned addresses, it is generally more efficient than manual expansion,
17452 // and generally only traps for software emulation when crossing page
17453 // boundaries.
17454
17455 if (!VT.isSimple())
17456 return false;
17457
17458 if (VT.isFloatingPoint() && !VT.isVector() &&
17459 !Subtarget.allowsUnalignedFPAccess())
17460 return false;
17461
17462 if (VT.getSimpleVT().isVector()) {
17463 if (Subtarget.hasVSX()) {
17464 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17465 VT != MVT::v4f32 && VT != MVT::v4i32)
17466 return false;
17467 } else {
17468 return false;
17469 }
17470 }
17471
17472 if (VT == MVT::ppcf128)
17473 return false;
17474
17475 if (Fast)
17476 *Fast = 1;
17477
17478 return true;
17479}
17480
17482 SDValue C) const {
17483 // Check integral scalar types.
17484 if (!VT.isScalarInteger())
17485 return false;
17486 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17487 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17488 return false;
17489 // This transformation will generate >= 2 operations. But the following
17490 // cases will generate <= 2 instructions during ISEL. So exclude them.
17491 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17492 // HW instruction, ie. MULLI
17493 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17494 // instruction is needed than case 1, ie. MULLI and RLDICR
17495 int64_t Imm = ConstNode->getSExtValue();
17496 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17497 Imm >>= Shift;
17498 if (isInt<16>(Imm))
17499 return false;
17500 uint64_t UImm = static_cast<uint64_t>(Imm);
17501 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17502 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17503 return true;
17504 }
17505 return false;
17506}
17507
17509 EVT VT) const {
17512}
17513
17515 Type *Ty) const {
17516 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17517 return false;
17518 switch (Ty->getScalarType()->getTypeID()) {
17519 case Type::FloatTyID:
17520 case Type::DoubleTyID:
17521 return true;
17522 case Type::FP128TyID:
17523 return Subtarget.hasP9Vector();
17524 default:
17525 return false;
17526 }
17527}
17528
17529// FIXME: add more patterns which are not profitable to hoist.
17531 if (!I->hasOneUse())
17532 return true;
17533
17534 Instruction *User = I->user_back();
17535 assert(User && "A single use instruction with no uses.");
17536
17537 switch (I->getOpcode()) {
17538 case Instruction::FMul: {
17539 // Don't break FMA, PowerPC prefers FMA.
17540 if (User->getOpcode() != Instruction::FSub &&
17541 User->getOpcode() != Instruction::FAdd)
17542 return true;
17543
17545 const Function *F = I->getFunction();
17546 const DataLayout &DL = F->getDataLayout();
17547 Type *Ty = User->getOperand(0)->getType();
17548
17549 return !(
17552 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17553 }
17554 case Instruction::Load: {
17555 // Don't break "store (load float*)" pattern, this pattern will be combined
17556 // to "store (load int32)" in later InstCombine pass. See function
17557 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17558 // cycles than loading a 32 bit integer.
17559 LoadInst *LI = cast<LoadInst>(I);
17560 // For the loads that combineLoadToOperationType does nothing, like
17561 // ordered load, it should be profitable to hoist them.
17562 // For swifterror load, it can only be used for pointer to pointer type, so
17563 // later type check should get rid of this case.
17564 if (!LI->isUnordered())
17565 return true;
17566
17567 if (User->getOpcode() != Instruction::Store)
17568 return true;
17569
17570 if (I->getType()->getTypeID() != Type::FloatTyID)
17571 return true;
17572
17573 return false;
17574 }
17575 default:
17576 return true;
17577 }
17578 return true;
17579}
17580
17581const MCPhysReg *
17583 // LR is a callee-save register, but we must treat it as clobbered by any call
17584 // site. Hence we include LR in the scratch registers, which are in turn added
17585 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17586 // to CTR, which is used by any indirect call.
17587 static const MCPhysReg ScratchRegs[] = {
17588 PPC::X12, PPC::LR8, PPC::CTR8, 0
17589 };
17590
17591 return ScratchRegs;
17592}
17593
17595 const Constant *PersonalityFn) const {
17596 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17597}
17598
17600 const Constant *PersonalityFn) const {
17601 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17602}
17603
17604bool
17606 EVT VT , unsigned DefinedValues) const {
17607 if (VT == MVT::v2i64)
17608 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17609
17610 if (Subtarget.hasVSX())
17611 return true;
17612
17614}
17615
17617 if (DisableILPPref || Subtarget.enableMachineScheduler())
17619
17620 return Sched::ILP;
17621}
17622
17623// Create a fast isel object.
17624FastISel *
17626 const TargetLibraryInfo *LibInfo) const {
17627 return PPC::createFastISel(FuncInfo, LibInfo);
17628}
17629
17630// 'Inverted' means the FMA opcode after negating one multiplicand.
17631// For example, (fma -a b c) = (fnmsub a b c)
17632static unsigned invertFMAOpcode(unsigned Opc) {
17633 switch (Opc) {
17634 default:
17635 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17636 case ISD::FMA:
17637 return PPCISD::FNMSUB;
17638 case PPCISD::FNMSUB:
17639 return ISD::FMA;
17640 }
17641}
17642
17644 bool LegalOps, bool OptForSize,
17646 unsigned Depth) const {
17648 return SDValue();
17649
17650 unsigned Opc = Op.getOpcode();
17651 EVT VT = Op.getValueType();
17652 SDNodeFlags Flags = Op.getNode()->getFlags();
17653
17654 switch (Opc) {
17655 case PPCISD::FNMSUB:
17656 if (!Op.hasOneUse() || !isTypeLegal(VT))
17657 break;
17658
17660 SDValue N0 = Op.getOperand(0);
17661 SDValue N1 = Op.getOperand(1);
17662 SDValue N2 = Op.getOperand(2);
17663 SDLoc Loc(Op);
17664
17666 SDValue NegN2 =
17667 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17668
17669 if (!NegN2)
17670 return SDValue();
17671
17672 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17673 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17674 // These transformations may change sign of zeroes. For example,
17675 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17676 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17677 // Try and choose the cheaper one to negate.
17679 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17680 N0Cost, Depth + 1);
17681
17683 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17684 N1Cost, Depth + 1);
17685
17686 if (NegN0 && N0Cost <= N1Cost) {
17687 Cost = std::min(N0Cost, N2Cost);
17688 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17689 } else if (NegN1) {
17690 Cost = std::min(N1Cost, N2Cost);
17691 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17692 }
17693 }
17694
17695 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17696 if (isOperationLegal(ISD::FMA, VT)) {
17697 Cost = N2Cost;
17698 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17699 }
17700
17701 break;
17702 }
17703
17704 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17705 Cost, Depth);
17706}
17707
17708// Override to enable LOAD_STACK_GUARD lowering on Linux.
17710 if (!Subtarget.isTargetLinux())
17712 return true;
17713}
17714
17715// Override to disable global variable loading on Linux and insert AIX canary
17716// word declaration.
17718 if (Subtarget.isAIXABI()) {
17719 M.getOrInsertGlobal(AIXSSPCanaryWordName,
17720 PointerType::getUnqual(M.getContext()));
17721 return;
17722 }
17723 if (!Subtarget.isTargetLinux())
17725}
17726
17728 if (Subtarget.isAIXABI())
17729 return M.getGlobalVariable(AIXSSPCanaryWordName);
17731}
17732
17734 bool ForCodeSize) const {
17735 if (!VT.isSimple() || !Subtarget.hasVSX())
17736 return false;
17737
17738 switch(VT.getSimpleVT().SimpleTy) {
17739 default:
17740 // For FP types that are currently not supported by PPC backend, return
17741 // false. Examples: f16, f80.
17742 return false;
17743 case MVT::f32:
17744 case MVT::f64: {
17745 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
17746 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17747 return true;
17748 }
17749 bool IsExact;
17750 APSInt IntResult(16, false);
17751 // The rounding mode doesn't really matter because we only care about floats
17752 // that can be converted to integers exactly.
17753 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17754 // For exact values in the range [-16, 15] we can materialize the float.
17755 if (IsExact && IntResult <= 15 && IntResult >= -16)
17756 return true;
17757 return Imm.isZero();
17758 }
17759 case MVT::ppcf128:
17760 return Imm.isPosZero();
17761 }
17762}
17763
17764// For vector shift operation op, fold
17765// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17767 SelectionDAG &DAG) {
17768 SDValue N0 = N->getOperand(0);
17769 SDValue N1 = N->getOperand(1);
17770 EVT VT = N0.getValueType();
17771 unsigned OpSizeInBits = VT.getScalarSizeInBits();
17772 unsigned Opcode = N->getOpcode();
17773 unsigned TargetOpcode;
17774
17775 switch (Opcode) {
17776 default:
17777 llvm_unreachable("Unexpected shift operation");
17778 case ISD::SHL:
17779 TargetOpcode = PPCISD::SHL;
17780 break;
17781 case ISD::SRL:
17782 TargetOpcode = PPCISD::SRL;
17783 break;
17784 case ISD::SRA:
17785 TargetOpcode = PPCISD::SRA;
17786 break;
17787 }
17788
17789 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17790 N1->getOpcode() == ISD::AND)
17791 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17792 if (Mask->getZExtValue() == OpSizeInBits - 1)
17793 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17794
17795 return SDValue();
17796}
17797
17798SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17799 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17800 return Value;
17801
17802 SDValue N0 = N->getOperand(0);
17803 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17804 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17805 N0.getOpcode() != ISD::SIGN_EXTEND ||
17806 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17807 N->getValueType(0) != MVT::i64)
17808 return SDValue();
17809
17810 // We can't save an operation here if the value is already extended, and
17811 // the existing shift is easier to combine.
17812 SDValue ExtsSrc = N0.getOperand(0);
17813 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17814 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17815 return SDValue();
17816
17817 SDLoc DL(N0);
17818 SDValue ShiftBy = SDValue(CN1, 0);
17819 // We want the shift amount to be i32 on the extswli, but the shift could
17820 // have an i64.
17821 if (ShiftBy.getValueType() == MVT::i64)
17822 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17823
17824 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17825 ShiftBy);
17826}
17827
17828SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17829 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17830 return Value;
17831
17832 return SDValue();
17833}
17834
17835SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17836 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17837 return Value;
17838
17839 return SDValue();
17840}
17841
17842// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17843// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17844// When C is zero, the equation (addi Z, -C) can be simplified to Z
17845// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17847 const PPCSubtarget &Subtarget) {
17848 if (!Subtarget.isPPC64())
17849 return SDValue();
17850
17851 SDValue LHS = N->getOperand(0);
17852 SDValue RHS = N->getOperand(1);
17853
17854 auto isZextOfCompareWithConstant = [](SDValue Op) {
17855 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17856 Op.getValueType() != MVT::i64)
17857 return false;
17858
17859 SDValue Cmp = Op.getOperand(0);
17860 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17861 Cmp.getOperand(0).getValueType() != MVT::i64)
17862 return false;
17863
17864 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17865 int64_t NegConstant = 0 - Constant->getSExtValue();
17866 // Due to the limitations of the addi instruction,
17867 // -C is required to be [-32768, 32767].
17868 return isInt<16>(NegConstant);
17869 }
17870
17871 return false;
17872 };
17873
17874 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17875 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17876
17877 // If there is a pattern, canonicalize a zext operand to the RHS.
17878 if (LHSHasPattern && !RHSHasPattern)
17879 std::swap(LHS, RHS);
17880 else if (!LHSHasPattern && !RHSHasPattern)
17881 return SDValue();
17882
17883 SDLoc DL(N);
17884 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17885 SDValue Cmp = RHS.getOperand(0);
17886 SDValue Z = Cmp.getOperand(0);
17887 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17888 int64_t NegConstant = 0 - Constant->getSExtValue();
17889
17890 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17891 default: break;
17892 case ISD::SETNE: {
17893 // when C == 0
17894 // --> addze X, (addic Z, -1).carry
17895 // /
17896 // add X, (zext(setne Z, C))--
17897 // \ when -32768 <= -C <= 32767 && C != 0
17898 // --> addze X, (addic (addi Z, -C), -1).carry
17899 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17900 DAG.getConstant(NegConstant, DL, MVT::i64));
17901 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17902 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17903 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17904 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17905 SDValue(Addc.getNode(), 1));
17906 }
17907 case ISD::SETEQ: {
17908 // when C == 0
17909 // --> addze X, (subfic Z, 0).carry
17910 // /
17911 // add X, (zext(sete Z, C))--
17912 // \ when -32768 <= -C <= 32767 && C != 0
17913 // --> addze X, (subfic (addi Z, -C), 0).carry
17914 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17915 DAG.getConstant(NegConstant, DL, MVT::i64));
17916 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17917 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17918 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17919 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17920 SDValue(Subc.getNode(), 1));
17921 }
17922 }
17923
17924 return SDValue();
17925}
17926
17927// Transform
17928// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17929// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17930// In this case both C1 and C2 must be known constants.
17931// C1+C2 must fit into a 34 bit signed integer.
17933 const PPCSubtarget &Subtarget) {
17934 if (!Subtarget.isUsingPCRelativeCalls())
17935 return SDValue();
17936
17937 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17938 // If we find that node try to cast the Global Address and the Constant.
17939 SDValue LHS = N->getOperand(0);
17940 SDValue RHS = N->getOperand(1);
17941
17942 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17943 std::swap(LHS, RHS);
17944
17945 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17946 return SDValue();
17947
17948 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17949 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17950 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17951
17952 // Check that both casts succeeded.
17953 if (!GSDN || !ConstNode)
17954 return SDValue();
17955
17956 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17957 SDLoc DL(GSDN);
17958
17959 // The signed int offset needs to fit in 34 bits.
17960 if (!isInt<34>(NewOffset))
17961 return SDValue();
17962
17963 // The new global address is a copy of the old global address except
17964 // that it has the updated Offset.
17965 SDValue GA =
17966 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17967 NewOffset, GSDN->getTargetFlags());
17968 SDValue MatPCRel =
17969 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17970 return MatPCRel;
17971}
17972
17973SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17974 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17975 return Value;
17976
17977 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17978 return Value;
17979
17980 return SDValue();
17981}
17982
17983// Detect TRUNCATE operations on bitcasts of float128 values.
17984// What we are looking for here is the situtation where we extract a subset
17985// of bits from a 128 bit float.
17986// This can be of two forms:
17987// 1) BITCAST of f128 feeding TRUNCATE
17988// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17989// The reason this is required is because we do not have a legal i128 type
17990// and so we want to prevent having to store the f128 and then reload part
17991// of it.
17992SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17993 DAGCombinerInfo &DCI) const {
17994 // If we are using CRBits then try that first.
17995 if (Subtarget.useCRBits()) {
17996 // Check if CRBits did anything and return that if it did.
17997 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17998 return CRTruncValue;
17999 }
18000
18001 SDLoc dl(N);
18002 SDValue Op0 = N->getOperand(0);
18003
18004 // Looking for a truncate of i128 to i64.
18005 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
18006 return SDValue();
18007
18008 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
18009
18010 // SRL feeding TRUNCATE.
18011 if (Op0.getOpcode() == ISD::SRL) {
18012 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
18013 // The right shift has to be by 64 bits.
18014 if (!ConstNode || ConstNode->getZExtValue() != 64)
18015 return SDValue();
18016
18017 // Switch the element number to extract.
18018 EltToExtract = EltToExtract ? 0 : 1;
18019 // Update Op0 past the SRL.
18020 Op0 = Op0.getOperand(0);
18021 }
18022
18023 // BITCAST feeding a TRUNCATE possibly via SRL.
18024 if (Op0.getOpcode() == ISD::BITCAST &&
18025 Op0.getValueType() == MVT::i128 &&
18026 Op0.getOperand(0).getValueType() == MVT::f128) {
18027 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18028 return DCI.DAG.getNode(
18029 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18030 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18031 }
18032 return SDValue();
18033}
18034
18035SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18036 SelectionDAG &DAG = DCI.DAG;
18037
18038 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18039 if (!ConstOpOrElement)
18040 return SDValue();
18041
18042 // An imul is usually smaller than the alternative sequence for legal type.
18044 isOperationLegal(ISD::MUL, N->getValueType(0)))
18045 return SDValue();
18046
18047 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18048 switch (this->Subtarget.getCPUDirective()) {
18049 default:
18050 // TODO: enhance the condition for subtarget before pwr8
18051 return false;
18052 case PPC::DIR_PWR8:
18053 // type mul add shl
18054 // scalar 4 1 1
18055 // vector 7 2 2
18056 return true;
18057 case PPC::DIR_PWR9:
18058 case PPC::DIR_PWR10:
18060 // type mul add shl
18061 // scalar 5 2 2
18062 // vector 7 2 2
18063
18064 // The cycle RATIO of related operations are showed as a table above.
18065 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18066 // scalar and vector type. For 2 instrs patterns, add/sub + shl
18067 // are 4, it is always profitable; but for 3 instrs patterns
18068 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18069 // So we should only do it for vector type.
18070 return IsAddOne && IsNeg ? VT.isVector() : true;
18071 }
18072 };
18073
18074 EVT VT = N->getValueType(0);
18075 SDLoc DL(N);
18076
18077 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18078 bool IsNeg = MulAmt.isNegative();
18079 APInt MulAmtAbs = MulAmt.abs();
18080
18081 if ((MulAmtAbs - 1).isPowerOf2()) {
18082 // (mul x, 2^N + 1) => (add (shl x, N), x)
18083 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18084
18085 if (!IsProfitable(IsNeg, true, VT))
18086 return SDValue();
18087
18088 SDValue Op0 = N->getOperand(0);
18089 SDValue Op1 =
18090 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18091 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18092 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18093
18094 if (!IsNeg)
18095 return Res;
18096
18097 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18098 } else if ((MulAmtAbs + 1).isPowerOf2()) {
18099 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18100 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18101
18102 if (!IsProfitable(IsNeg, false, VT))
18103 return SDValue();
18104
18105 SDValue Op0 = N->getOperand(0);
18106 SDValue Op1 =
18107 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18108 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18109
18110 if (!IsNeg)
18111 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18112 else
18113 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18114
18115 } else {
18116 return SDValue();
18117 }
18118}
18119
18120// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18121// in combiner since we need to check SD flags and other subtarget features.
18122SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18123 DAGCombinerInfo &DCI) const {
18124 SDValue N0 = N->getOperand(0);
18125 SDValue N1 = N->getOperand(1);
18126 SDValue N2 = N->getOperand(2);
18127 SDNodeFlags Flags = N->getFlags();
18128 EVT VT = N->getValueType(0);
18129 SelectionDAG &DAG = DCI.DAG;
18131 unsigned Opc = N->getOpcode();
18132 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18133 bool LegalOps = !DCI.isBeforeLegalizeOps();
18134 SDLoc Loc(N);
18135
18136 if (!isOperationLegal(ISD::FMA, VT))
18137 return SDValue();
18138
18139 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18140 // since (fnmsub a b c)=-0 while c-ab=+0.
18141 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18142 return SDValue();
18143
18144 // (fma (fneg a) b c) => (fnmsub a b c)
18145 // (fnmsub (fneg a) b c) => (fma a b c)
18146 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18147 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18148
18149 // (fma a (fneg b) c) => (fnmsub a b c)
18150 // (fnmsub a (fneg b) c) => (fma a b c)
18151 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18152 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18153
18154 return SDValue();
18155}
18156
18157bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18158 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18159 if (!Subtarget.is64BitELFABI())
18160 return false;
18161
18162 // If not a tail call then no need to proceed.
18163 if (!CI->isTailCall())
18164 return false;
18165
18166 // If sibling calls have been disabled and tail-calls aren't guaranteed
18167 // there is no reason to duplicate.
18168 auto &TM = getTargetMachine();
18169 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18170 return false;
18171
18172 // Can't tail call a function called indirectly, or if it has variadic args.
18173 const Function *Callee = CI->getCalledFunction();
18174 if (!Callee || Callee->isVarArg())
18175 return false;
18176
18177 // Make sure the callee and caller calling conventions are eligible for tco.
18178 const Function *Caller = CI->getParent()->getParent();
18179 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18180 CI->getCallingConv()))
18181 return false;
18182
18183 // If the function is local then we have a good chance at tail-calling it
18184 return getTargetMachine().shouldAssumeDSOLocal(Callee);
18185}
18186
18187bool PPCTargetLowering::
18188isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18189 const Value *Mask = AndI.getOperand(1);
18190 // If the mask is suitable for andi. or andis. we should sink the and.
18191 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18192 // Can't handle constants wider than 64-bits.
18193 if (CI->getBitWidth() > 64)
18194 return false;
18195 int64_t ConstVal = CI->getZExtValue();
18196 return isUInt<16>(ConstVal) ||
18197 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18198 }
18199
18200 // For non-constant masks, we can always use the record-form and.
18201 return true;
18202}
18203
18204/// getAddrModeForFlags - Based on the set of address flags, select the most
18205/// optimal instruction format to match by.
18206PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18207 // This is not a node we should be handling here.
18208 if (Flags == PPC::MOF_None)
18209 return PPC::AM_None;
18210 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18211 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18212 if ((Flags & FlagSet) == FlagSet)
18213 return PPC::AM_DForm;
18214 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18215 if ((Flags & FlagSet) == FlagSet)
18216 return PPC::AM_DSForm;
18217 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18218 if ((Flags & FlagSet) == FlagSet)
18219 return PPC::AM_DQForm;
18220 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18221 if ((Flags & FlagSet) == FlagSet)
18222 return PPC::AM_PrefixDForm;
18223 // If no other forms are selected, return an X-Form as it is the most
18224 // general addressing mode.
18225 return PPC::AM_XForm;
18226}
18227
18228/// Set alignment flags based on whether or not the Frame Index is aligned.
18229/// Utilized when computing flags for address computation when selecting
18230/// load and store instructions.
18231static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18232 SelectionDAG &DAG) {
18233 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18234 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18235 if (!FI)
18236 return;
18238 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18239 // If this is (add $FI, $S16Imm), the alignment flags are already set
18240 // based on the immediate. We just need to clear the alignment flags
18241 // if the FI alignment is weaker.
18242 if ((FrameIndexAlign % 4) != 0)
18243 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18244 if ((FrameIndexAlign % 16) != 0)
18245 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18246 // If the address is a plain FrameIndex, set alignment flags based on
18247 // FI alignment.
18248 if (!IsAdd) {
18249 if ((FrameIndexAlign % 4) == 0)
18250 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18251 if ((FrameIndexAlign % 16) == 0)
18252 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18253 }
18254}
18255
18256/// Given a node, compute flags that are used for address computation when
18257/// selecting load and store instructions. The flags computed are stored in
18258/// FlagSet. This function takes into account whether the node is a constant,
18259/// an ADD, OR, or a constant, and computes the address flags accordingly.
18260static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18261 SelectionDAG &DAG) {
18262 // Set the alignment flags for the node depending on if the node is
18263 // 4-byte or 16-byte aligned.
18264 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18265 if ((Imm & 0x3) == 0)
18266 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18267 if ((Imm & 0xf) == 0)
18268 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18269 };
18270
18271 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18272 // All 32-bit constants can be computed as LIS + Disp.
18273 const APInt &ConstImm = CN->getAPIntValue();
18274 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18275 FlagSet |= PPC::MOF_AddrIsSImm32;
18276 SetAlignFlagsForImm(ConstImm.getZExtValue());
18277 setAlignFlagsForFI(N, FlagSet, DAG);
18278 }
18279 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18280 FlagSet |= PPC::MOF_RPlusSImm34;
18281 else // Let constant materialization handle large constants.
18282 FlagSet |= PPC::MOF_NotAddNorCst;
18283 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18284 // This address can be represented as an addition of:
18285 // - Register + Imm16 (possibly a multiple of 4/16)
18286 // - Register + Imm34
18287 // - Register + PPCISD::Lo
18288 // - Register + Register
18289 // In any case, we won't have to match this as Base + Zero.
18290 SDValue RHS = N.getOperand(1);
18291 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18292 const APInt &ConstImm = CN->getAPIntValue();
18293 if (ConstImm.isSignedIntN(16)) {
18294 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18295 SetAlignFlagsForImm(ConstImm.getZExtValue());
18296 setAlignFlagsForFI(N, FlagSet, DAG);
18297 }
18298 if (ConstImm.isSignedIntN(34))
18299 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18300 else
18301 FlagSet |= PPC::MOF_RPlusR; // Register.
18302 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18303 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18304 else
18305 FlagSet |= PPC::MOF_RPlusR;
18306 } else { // The address computation is not a constant or an addition.
18307 setAlignFlagsForFI(N, FlagSet, DAG);
18308 FlagSet |= PPC::MOF_NotAddNorCst;
18309 }
18310}
18311
18312static bool isPCRelNode(SDValue N) {
18313 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18314 isValidPCRelNode<ConstantPoolSDNode>(N) ||
18315 isValidPCRelNode<GlobalAddressSDNode>(N) ||
18316 isValidPCRelNode<JumpTableSDNode>(N) ||
18317 isValidPCRelNode<BlockAddressSDNode>(N));
18318}
18319
18320/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18321/// the address flags of the load/store instruction that is to be matched.
18322unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18323 SelectionDAG &DAG) const {
18324 unsigned FlagSet = PPC::MOF_None;
18325
18326 // Compute subtarget flags.
18327 if (!Subtarget.hasP9Vector())
18328 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18329 else
18330 FlagSet |= PPC::MOF_SubtargetP9;
18331
18332 if (Subtarget.hasPrefixInstrs())
18333 FlagSet |= PPC::MOF_SubtargetP10;
18334
18335 if (Subtarget.hasSPE())
18336 FlagSet |= PPC::MOF_SubtargetSPE;
18337
18338 // Check if we have a PCRel node and return early.
18339 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18340 return FlagSet;
18341
18342 // If the node is the paired load/store intrinsics, compute flags for
18343 // address computation and return early.
18344 unsigned ParentOp = Parent->getOpcode();
18345 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18346 (ParentOp == ISD::INTRINSIC_VOID))) {
18347 unsigned ID = Parent->getConstantOperandVal(1);
18348 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18349 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18350 ? Parent->getOperand(2)
18351 : Parent->getOperand(3);
18352 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18353 FlagSet |= PPC::MOF_Vector;
18354 return FlagSet;
18355 }
18356 }
18357
18358 // Mark this as something we don't want to handle here if it is atomic
18359 // or pre-increment instruction.
18360 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18361 if (LSB->isIndexed())
18362 return PPC::MOF_None;
18363
18364 // Compute in-memory type flags. This is based on if there are scalars,
18365 // floats or vectors.
18366 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18367 assert(MN && "Parent should be a MemSDNode!");
18368 EVT MemVT = MN->getMemoryVT();
18369 unsigned Size = MemVT.getSizeInBits();
18370 if (MemVT.isScalarInteger()) {
18371 assert(Size <= 128 &&
18372 "Not expecting scalar integers larger than 16 bytes!");
18373 if (Size < 32)
18374 FlagSet |= PPC::MOF_SubWordInt;
18375 else if (Size == 32)
18376 FlagSet |= PPC::MOF_WordInt;
18377 else
18378 FlagSet |= PPC::MOF_DoubleWordInt;
18379 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18380 if (Size == 128)
18381 FlagSet |= PPC::MOF_Vector;
18382 else if (Size == 256) {
18383 assert(Subtarget.pairedVectorMemops() &&
18384 "256-bit vectors are only available when paired vector memops is "
18385 "enabled!");
18386 FlagSet |= PPC::MOF_Vector;
18387 } else
18388 llvm_unreachable("Not expecting illegal vectors!");
18389 } else { // Floating point type: can be scalar, f128 or vector types.
18390 if (Size == 32 || Size == 64)
18391 FlagSet |= PPC::MOF_ScalarFloat;
18392 else if (MemVT == MVT::f128 || MemVT.isVector())
18393 FlagSet |= PPC::MOF_Vector;
18394 else
18395 llvm_unreachable("Not expecting illegal scalar floats!");
18396 }
18397
18398 // Compute flags for address computation.
18399 computeFlagsForAddressComputation(N, FlagSet, DAG);
18400
18401 // Compute type extension flags.
18402 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18403 switch (LN->getExtensionType()) {
18404 case ISD::SEXTLOAD:
18405 FlagSet |= PPC::MOF_SExt;
18406 break;
18407 case ISD::EXTLOAD:
18408 case ISD::ZEXTLOAD:
18409 FlagSet |= PPC::MOF_ZExt;
18410 break;
18411 case ISD::NON_EXTLOAD:
18412 FlagSet |= PPC::MOF_NoExt;
18413 break;
18414 }
18415 } else
18416 FlagSet |= PPC::MOF_NoExt;
18417
18418 // For integers, no extension is the same as zero extension.
18419 // We set the extension mode to zero extension so we don't have
18420 // to add separate entries in AddrModesMap for loads and stores.
18421 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18422 FlagSet |= PPC::MOF_ZExt;
18423 FlagSet &= ~PPC::MOF_NoExt;
18424 }
18425
18426 // If we don't have prefixed instructions, 34-bit constants should be
18427 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18428 bool IsNonP1034BitConst =
18430 FlagSet) == PPC::MOF_RPlusSImm34;
18431 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18432 IsNonP1034BitConst)
18433 FlagSet |= PPC::MOF_NotAddNorCst;
18434
18435 return FlagSet;
18436}
18437
18438/// SelectForceXFormMode - Given the specified address, force it to be
18439/// represented as an indexed [r+r] operation (an XForm instruction).
18441 SDValue &Base,
18442 SelectionDAG &DAG) const {
18443
18445 int16_t ForceXFormImm = 0;
18446 if (provablyDisjointOr(DAG, N) &&
18447 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18448 Disp = N.getOperand(0);
18449 Base = N.getOperand(1);
18450 return Mode;
18451 }
18452
18453 // If the address is the result of an add, we will utilize the fact that the
18454 // address calculation includes an implicit add. However, we can reduce
18455 // register pressure if we do not materialize a constant just for use as the
18456 // index register. We only get rid of the add if it is not an add of a
18457 // value and a 16-bit signed constant and both have a single use.
18458 if (N.getOpcode() == ISD::ADD &&
18459 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18460 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18461 Disp = N.getOperand(0);
18462 Base = N.getOperand(1);
18463 return Mode;
18464 }
18465
18466 // Otherwise, use R0 as the base register.
18467 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18468 N.getValueType());
18469 Base = N;
18470
18471 return Mode;
18472}
18473
18475 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18476 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18477 EVT ValVT = Val.getValueType();
18478 // If we are splitting a scalar integer into f64 parts (i.e. so they
18479 // can be placed into VFRC registers), we need to zero extend and
18480 // bitcast the values. This will ensure the value is placed into a
18481 // VSR using direct moves or stack operations as needed.
18482 if (PartVT == MVT::f64 &&
18483 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18484 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18485 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18486 Parts[0] = Val;
18487 return true;
18488 }
18489 return false;
18490}
18491
18492SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18493 SelectionDAG &DAG) const {
18494 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18496 EVT RetVT = Op.getValueType();
18497 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18498 SDValue Callee =
18499 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18500 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18503 for (const SDValue &N : Op->op_values()) {
18504 EVT ArgVT = N.getValueType();
18505 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18506 Entry.Node = N;
18507 Entry.Ty = ArgTy;
18508 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18509 Entry.IsZExt = !Entry.IsSExt;
18510 Args.push_back(Entry);
18511 }
18512
18513 SDValue InChain = DAG.getEntryNode();
18514 SDValue TCChain = InChain;
18515 const Function &F = DAG.getMachineFunction().getFunction();
18516 bool isTailCall =
18517 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18518 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18519 if (isTailCall)
18520 InChain = TCChain;
18521 CLI.setDebugLoc(SDLoc(Op))
18522 .setChain(InChain)
18523 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18524 .setTailCall(isTailCall)
18525 .setSExtResult(SignExtend)
18526 .setZExtResult(!SignExtend)
18528 return TLI.LowerCallTo(CLI).first;
18529}
18530
18531SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18532 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18533 SelectionDAG &DAG) const {
18534 if (Op.getValueType() == MVT::f32)
18535 return lowerToLibCall(LibCallFloatName, Op, DAG);
18536
18537 if (Op.getValueType() == MVT::f64)
18538 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18539
18540 return SDValue();
18541}
18542
18543bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18544 SDNodeFlags Flags = Op.getNode()->getFlags();
18545 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18546 Flags.hasNoNaNs() && Flags.hasNoInfs();
18547}
18548
18549bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18550 return Op.getNode()->getFlags().hasApproximateFuncs();
18551}
18552
18553bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18555}
18556
18557SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18558 const char *LibCallFloatName,
18559 const char *LibCallDoubleNameFinite,
18560 const char *LibCallFloatNameFinite,
18561 SDValue Op,
18562 SelectionDAG &DAG) const {
18563 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18564 return SDValue();
18565
18566 if (!isLowringToMASSFiniteSafe(Op))
18567 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18568 DAG);
18569
18570 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18571 LibCallDoubleNameFinite, Op, DAG);
18572}
18573
18574SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18575 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18576 "__xl_powf_finite", Op, DAG);
18577}
18578
18579SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18580 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18581 "__xl_sinf_finite", Op, DAG);
18582}
18583
18584SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18585 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18586 "__xl_cosf_finite", Op, DAG);
18587}
18588
18589SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18590 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18591 "__xl_logf_finite", Op, DAG);
18592}
18593
18594SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18595 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18596 "__xl_log10f_finite", Op, DAG);
18597}
18598
18599SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18600 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18601 "__xl_expf_finite", Op, DAG);
18602}
18603
18604// If we happen to match to an aligned D-Form, check if the Frame Index is
18605// adequately aligned. If it is not, reset the mode to match to X-Form.
18606static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18607 PPC::AddrMode &Mode) {
18608 if (!isa<FrameIndexSDNode>(N))
18609 return;
18610 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18611 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18612 Mode = PPC::AM_XForm;
18613}
18614
18615/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18616/// compute the address flags of the node, get the optimal address mode based
18617/// on the flags, and set the Base and Disp based on the address mode.
18619 SDValue N, SDValue &Disp,
18620 SDValue &Base,
18621 SelectionDAG &DAG,
18622 MaybeAlign Align) const {
18623 SDLoc DL(Parent);
18624
18625 // Compute the address flags.
18626 unsigned Flags = computeMOFlags(Parent, N, DAG);
18627
18628 // Get the optimal address mode based on the Flags.
18629 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18630
18631 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18632 // Select an X-Form load if it is not.
18633 setXFormForUnalignedFI(N, Flags, Mode);
18634
18635 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18636 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18637 assert(Subtarget.isUsingPCRelativeCalls() &&
18638 "Must be using PC-Relative calls when a valid PC-Relative node is "
18639 "present!");
18640 Mode = PPC::AM_PCRel;
18641 }
18642
18643 // Set Base and Disp accordingly depending on the address mode.
18644 switch (Mode) {
18645 case PPC::AM_DForm:
18646 case PPC::AM_DSForm:
18647 case PPC::AM_DQForm: {
18648 // This is a register plus a 16-bit immediate. The base will be the
18649 // register and the displacement will be the immediate unless it
18650 // isn't sufficiently aligned.
18651 if (Flags & PPC::MOF_RPlusSImm16) {
18652 SDValue Op0 = N.getOperand(0);
18653 SDValue Op1 = N.getOperand(1);
18654 int16_t Imm = Op1->getAsZExtVal();
18655 if (!Align || isAligned(*Align, Imm)) {
18656 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18657 Base = Op0;
18658 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18659 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18660 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18661 }
18662 break;
18663 }
18664 }
18665 // This is a register plus the @lo relocation. The base is the register
18666 // and the displacement is the global address.
18667 else if (Flags & PPC::MOF_RPlusLo) {
18668 Disp = N.getOperand(1).getOperand(0); // The global address.
18673 Base = N.getOperand(0);
18674 break;
18675 }
18676 // This is a constant address at most 32 bits. The base will be
18677 // zero or load-immediate-shifted and the displacement will be
18678 // the low 16 bits of the address.
18679 else if (Flags & PPC::MOF_AddrIsSImm32) {
18680 auto *CN = cast<ConstantSDNode>(N);
18681 EVT CNType = CN->getValueType(0);
18682 uint64_t CNImm = CN->getZExtValue();
18683 // If this address fits entirely in a 16-bit sext immediate field, codegen
18684 // this as "d, 0".
18685 int16_t Imm;
18686 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18687 Disp = DAG.getTargetConstant(Imm, DL, CNType);
18688 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18689 CNType);
18690 break;
18691 }
18692 // Handle 32-bit sext immediate with LIS + Addr mode.
18693 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18694 (!Align || isAligned(*Align, CNImm))) {
18695 int32_t Addr = (int32_t)CNImm;
18696 // Otherwise, break this down into LIS + Disp.
18697 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18698 Base =
18699 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18700 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18701 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18702 break;
18703 }
18704 }
18705 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18706 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18707 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18708 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18709 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18710 } else
18711 Base = N;
18712 break;
18713 }
18714 case PPC::AM_PrefixDForm: {
18715 int64_t Imm34 = 0;
18716 unsigned Opcode = N.getOpcode();
18717 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18718 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18719 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18720 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18721 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18722 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18723 else
18724 Base = N.getOperand(0);
18725 } else if (isIntS34Immediate(N, Imm34)) {
18726 // The address is a 34-bit signed immediate.
18727 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18728 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18729 }
18730 break;
18731 }
18732 case PPC::AM_PCRel: {
18733 // When selecting PC-Relative instructions, "Base" is not utilized as
18734 // we select the address as [PC+imm].
18735 Disp = N;
18736 break;
18737 }
18738 case PPC::AM_None:
18739 break;
18740 default: { // By default, X-Form is always available to be selected.
18741 // When a frame index is not aligned, we also match by XForm.
18742 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18743 Base = FI ? N : N.getOperand(1);
18744 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18745 N.getValueType())
18746 : N.getOperand(0);
18747 break;
18748 }
18749 }
18750 return Mode;
18751}
18752
18754 bool Return,
18755 bool IsVarArg) const {
18756 switch (CC) {
18757 case CallingConv::Cold:
18758 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18759 default:
18760 return CC_PPC64_ELF;
18761 }
18762}
18763
18765 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18766}
18767
18770 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18771 if (shouldInlineQuadwordAtomics() && Size == 128)
18773
18774 switch (AI->getOperation()) {
18778 default:
18780 }
18781
18782 llvm_unreachable("unreachable atomicrmw operation");
18783}
18784
18787 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18788 if (shouldInlineQuadwordAtomics() && Size == 128)
18791}
18792
18793static Intrinsic::ID
18795 switch (BinOp) {
18796 default:
18797 llvm_unreachable("Unexpected AtomicRMW BinOp");
18799 return Intrinsic::ppc_atomicrmw_xchg_i128;
18800 case AtomicRMWInst::Add:
18801 return Intrinsic::ppc_atomicrmw_add_i128;
18802 case AtomicRMWInst::Sub:
18803 return Intrinsic::ppc_atomicrmw_sub_i128;
18804 case AtomicRMWInst::And:
18805 return Intrinsic::ppc_atomicrmw_and_i128;
18806 case AtomicRMWInst::Or:
18807 return Intrinsic::ppc_atomicrmw_or_i128;
18808 case AtomicRMWInst::Xor:
18809 return Intrinsic::ppc_atomicrmw_xor_i128;
18811 return Intrinsic::ppc_atomicrmw_nand_i128;
18812 }
18813}
18814
18816 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18817 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18818 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18819 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18820 Type *ValTy = Incr->getType();
18821 assert(ValTy->getPrimitiveSizeInBits() == 128);
18824 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18825 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18826 Value *IncrHi =
18827 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18828 Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
18829 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18830 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18831 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18832 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18833 return Builder.CreateOr(
18834 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18835}
18836
18838 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18839 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18840 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18841 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18842 Type *ValTy = CmpVal->getType();
18843 assert(ValTy->getPrimitiveSizeInBits() == 128);
18844 Function *IntCmpXchg =
18845 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18846 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18847 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18848 Value *CmpHi =
18849 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18850 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18851 Value *NewHi =
18852 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18853 emitLeadingFence(Builder, CI, Ord);
18854 Value *LoHi =
18855 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
18856 emitTrailingFence(Builder, CI, Ord);
18857 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18858 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18859 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18860 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18861 return Builder.CreateOr(
18862 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18863}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
Value * RHS
Value * LHS
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5282
bool isDenormal() const
Definition: APFloat.h:1349
APInt bitcastToAPInt() const
Definition: APFloat.h:1254
Class for arbitrary precision integers.
Definition: APInt.h:77
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:213
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1386
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:428
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1309
APInt abs() const
Get the absolute value.
Definition: APInt.h:1752
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:308
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:414
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:450
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1679
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:419
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:285
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:275
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:494
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:695
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:707
@ Add
*p = old + v
Definition: Instructions.h:711
@ Or
*p = old | v
Definition: Instructions.h:719
@ Sub
*p = old - v
Definition: Instructions.h:713
@ And
*p = old & v
Definition: Instructions.h:715
@ Xor
*p = old ^ v
Definition: Instructions.h:721
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:747
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:751
@ Nand
*p = ~(old & v)
Definition: Instructions.h:717
BinOp getOperation() const
Definition: Instructions.h:786
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:391
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:168
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:890
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1971
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1523
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1385
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1391
unsigned arg_size() const
Definition: InstrTypes.h:1408
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:900
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:865
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:698
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:362
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:716
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:728
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
BasicBlockListType::const_iterator const_iterator
Definition: Function.h:70
arg_iterator arg_begin()
Definition: Function.h:831
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
size_t arg_size() const
Definition: Function.h:864
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:225
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:690
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:582
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:267
bool hasHiddenVisibility() const
Definition: GlobalValue.h:250
StringRef getSection() const
Definition: Globals.cpp:183
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:124
bool hasComdat() const
Definition: GlobalValue.h:241
Type * getValueType() const
Definition: GlobalValue.h:296
bool hasProtectedVisibility() const
Definition: GlobalValue.h:251
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:92
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2514
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1435
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:172
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1414
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2019
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2005
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1495
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2410
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:173
bool isUnordered() const
Definition: Instructions.h:246
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
Machine Value Type.
SimpleValueType SimpleTy
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
bool isAIXFuncTLSModelOptInitDone() const
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:300
bool is32BitELFABI() const
Definition: PPCSubtarget.h:219
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:259
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool useSoftFloat() const
Definition: PPCSubtarget.h:174
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:142
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:202
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:253
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:271
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:145
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:134
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:206
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
bool isTargetLinux() const
Definition: PPCSubtarget.h:212
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:277
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:289
bool is64BitELFABI() const
Definition: PPCSubtarget.h:218
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:155
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:295
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:152
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:265
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:488
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:492
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:746
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:486
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:741
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:487
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:782
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:481
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:813
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:859
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:499
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:753
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:568
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Class to represent struct types.
Definition: DerivedTypes.h:216
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:246
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:811
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:764
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1147
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1143
@ TargetConstantPool
Definition: ISDOpcodes.h:174
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:484
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:728
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1176
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1262
@ STRICT_FCEIL
Definition: ISDOpcodes.h:434
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1052
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:797
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:491
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:804
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:551
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:702
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:485
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:927
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:917
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1220
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:954
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:788
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:458
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:628
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1068
@ TargetJumpTable
Definition: ISDOpcodes.h:173
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1242
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1009
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:944
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1098
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1077
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:515
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:741
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1258
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1172
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:438
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:894
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:659
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:719
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:608
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:581
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:999
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:432
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:543
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:794
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:433
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:756
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1269
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:986
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1062
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:812
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:682
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:902
@ STRICT_FROUND
Definition: ISDOpcodes.h:436
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:750
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:457
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:435
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1118
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:451
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:473
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:450
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:850
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1203
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:478
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:694
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1229
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:532
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:883
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1115
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:431
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:800
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1167
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1091
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:777
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:501
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1161
@ STRICT_FRINT
Definition: ISDOpcodes.h:430
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1341
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1226
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:523
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1587
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1503
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1554
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1534
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1593
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1484
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:91
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:200
@ MO_TPREL_HA
Definition: PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition: PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TPREL_LO
Definition: PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:160
@ MO_HA
Definition: PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:105
@ XTY_ER
External reference.
Definition: XCOFF.h:241
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:31
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:491
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:509
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:271
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:246
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:274
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:56
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)