LLVM 20.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallSet.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
144 cl::desc("max depth when checking alias info in GatherAllAliases()"));
145
147 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
148 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
149 "function to use initial-exec"));
150
151STATISTIC(NumTailCalls, "Number of tail calls");
152STATISTIC(NumSiblingCalls, "Number of sibling calls");
153STATISTIC(ShufflesHandledWithVPERM,
154 "Number of shuffles lowered to a VPERM or XXPERM");
155STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
156
157static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
158
159static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
160
161static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
162
163// A faster local-[exec|dynamic] TLS access sequence (enabled with the
164// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
165// variables; consistent with the IBM XL compiler, we apply a max size of
166// slightly under 32KB.
168
169// FIXME: Remove this once the bug has been fixed!
171
173 const PPCSubtarget &STI)
174 : TargetLowering(TM), Subtarget(STI) {
175 // Initialize map that relates the PPC addressing modes to the computed flags
176 // of a load/store instruction. The map is used to determine the optimal
177 // addressing mode when selecting load and stores.
178 initializeAddrModeMap();
179 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
180 // arguments are at least 4/8 bytes aligned.
181 bool isPPC64 = Subtarget.isPPC64();
182 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
183 const MVT RegVT = Subtarget.getScalarIntVT();
184
185 // Set up the register classes.
186 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
187 if (!useSoftFloat()) {
188 if (hasSPE()) {
189 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
190 // EFPU2 APU only supports f32
191 if (!Subtarget.hasEFPU2())
192 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
193 } else {
194 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
195 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
196 }
197 }
198
200
201 // On P10, the default lowering generates better code using the
202 // setbc instruction.
203 if (!Subtarget.hasP10Vector()) {
205 if (isPPC64)
207 }
208
209 // Match BITREVERSE to customized fast code sequence in the td file.
212
213 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
215
216 // Custom lower inline assembly to check for special registers.
219
220 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
221 for (MVT VT : MVT::integer_valuetypes()) {
224 }
225
226 if (Subtarget.isISA3_0()) {
227 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
228 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
229 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
230 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
231 } else {
232 // No extending loads from f16 or HW conversions back and forth.
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
236 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
239 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
240 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
241 }
242
243 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
244
245 // PowerPC has pre-inc load and store's.
256 if (!Subtarget.hasSPE()) {
261 }
262
263 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
264 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
265 for (MVT VT : ScalarIntVTs) {
270 }
271
272 if (Subtarget.useCRBits()) {
274
275 if (isPPC64 || Subtarget.hasFPCVT()) {
280
282 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
284 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
285
290
292 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
294 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
295 } else {
300 }
301
302 // PowerPC does not support direct load/store of condition registers.
305
306 // FIXME: Remove this once the ANDI glue bug is fixed:
307 if (ANDIGlueBug)
309
310 for (MVT VT : MVT::integer_valuetypes()) {
313 setTruncStoreAction(VT, MVT::i1, Expand);
314 }
315
316 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
317 }
318
319 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
320 // PPC (the libcall is not available).
325
326 // We do not currently implement these libm ops for PowerPC.
327 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
328 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
329 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
330 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
332 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
333
334 // PowerPC has no SREM/UREM instructions unless we are on P9
335 // On P9 we may use a hardware instruction to compute the remainder.
336 // When the result of both the remainder and the division is required it is
337 // more efficient to compute the remainder from the result of the division
338 // rather than use the remainder instruction. The instructions are legalized
339 // directly because the DivRemPairsPass performs the transformation at the IR
340 // level.
341 if (Subtarget.isISA3_0()) {
346 } else {
351 }
352
353 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
362
363 // Handle constrained floating-point operations of scalar.
364 // TODO: Handle SPE specific operation.
370
375
376 if (!Subtarget.hasSPE()) {
379 }
380
381 if (Subtarget.hasVSX()) {
384 }
385
386 if (Subtarget.hasFSQRT()) {
389 }
390
391 if (Subtarget.hasFPRND()) {
396
401 }
402
403 // We don't support sin/cos/sqrt/fmod/pow
414
415 // MASS transformation for LLVM intrinsics with replicating fast-math flag
416 // to be consistent to PPCGenScalarMASSEntries pass
417 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
430 }
431
432 if (Subtarget.hasSPE()) {
435 } else {
436 setOperationAction(ISD::FMA , MVT::f64, Legal);
437 setOperationAction(ISD::FMA , MVT::f32, Legal);
440 }
441
442 if (Subtarget.hasSPE())
443 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
444
445 // If we're enabling GP optimizations, use hardware square root
446 if (!Subtarget.hasFSQRT() &&
447 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
448 Subtarget.hasFRE()))
450
451 if (!Subtarget.hasFSQRT() &&
452 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
453 Subtarget.hasFRES()))
455
456 if (Subtarget.hasFCPSGN()) {
459 } else {
462 }
463
464 if (Subtarget.hasFPRND()) {
469
474 }
475
476 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
477 // instruction xxbrd to speed up scalar BSWAP64.
478 if (Subtarget.isISA3_1()) {
481 } else {
484 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
485 }
486
487 // CTPOP or CTTZ were introduced in P8/P9 respectively
488 if (Subtarget.isISA3_0()) {
489 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
490 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
491 } else {
492 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
493 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
494 }
495
496 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
499 } else {
502 }
503
504 // PowerPC does not have ROTR
507
508 if (!Subtarget.useCRBits()) {
509 // PowerPC does not have Select
514 }
515
516 // PowerPC wants to turn select_cc of FP into fsel when possible.
519
520 // PowerPC wants to optimize integer setcc a bit
521 if (!Subtarget.useCRBits())
523
524 if (Subtarget.hasFPU()) {
528
532 }
533
534 // PowerPC does not have BRCOND which requires SetCC
535 if (!Subtarget.useCRBits())
537
539
540 if (Subtarget.hasSPE()) {
541 // SPE has built-in conversions
548
549 // SPE supports signaling compare of f32/f64.
552 } else {
553 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
556
557 // PowerPC does not have [U|S]INT_TO_FP
562 }
563
564 if (Subtarget.hasDirectMove() && isPPC64) {
569 if (TM.Options.UnsafeFPMath) {
578 }
579 } else {
584 }
585
586 // We cannot sextinreg(i1). Expand to shifts.
588
589 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
590 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
591 // support continuation, user-level threading, and etc.. As a result, no
592 // other SjLj exception interfaces are implemented and please don't build
593 // your own exception handling based on them.
594 // LLVM/Clang supports zero-cost DWARF exception handling.
597
598 // We want to legalize GlobalAddress and ConstantPool nodes into the
599 // appropriate instructions to materialize the address.
610
611 // TRAP is legal.
612 setOperationAction(ISD::TRAP, MVT::Other, Legal);
613
614 // TRAMPOLINE is custom lowered.
617
618 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
620
621 if (Subtarget.is64BitELFABI()) {
622 // VAARG always uses double-word chunks, so promote anything smaller.
624 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
626 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
628 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
630 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
632 } else if (Subtarget.is32BitELFABI()) {
633 // VAARG is custom lowered with the 32-bit SVR4 ABI.
636 } else
638
639 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
640 if (Subtarget.is32BitELFABI())
642 else
644
645 // Use the default implementation.
646 setOperationAction(ISD::VAEND , MVT::Other, Expand);
655
656 // We want to custom lower some of our intrinsics.
662
663 // To handle counter-based loop conditions.
665
670
671 // Comparisons that require checking two conditions.
672 if (Subtarget.hasSPE()) {
677 }
690
693
694 if (Subtarget.has64BitSupport()) {
695 // They also have instructions for converting between i64 and fp.
704 // This is just the low 32 bits of a (signed) fp->i64 conversion.
705 // We cannot do this with Promote because i64 is not a legal type.
708
709 if (Subtarget.hasLFIWAX() || isPPC64) {
712 }
713 } else {
714 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
715 if (Subtarget.hasSPE()) {
718 } else {
721 }
722 }
723
724 // With the instructions enabled under FPCVT, we can do everything.
725 if (Subtarget.hasFPCVT()) {
726 if (Subtarget.has64BitSupport()) {
735 }
736
745 }
746
747 if (Subtarget.use64BitRegs()) {
748 // 64-bit PowerPC implementations can support i64 types directly
749 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
750 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
752 // 64-bit PowerPC wants to expand i128 shifts itself.
756 } else {
757 // 32-bit PowerPC wants to expand i64 shifts itself.
761 }
762
763 // PowerPC has better expansions for funnel shifts than the generic
764 // TargetLowering::expandFunnelShift.
765 if (Subtarget.has64BitSupport()) {
768 }
771
772 if (Subtarget.hasVSX()) {
777 }
778
779 if (Subtarget.hasAltivec()) {
780 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
785 }
786 // First set operation action for all vector types to expand. Then we
787 // will selectively turn on ones that can be effectively codegen'd.
789 // add/sub are legal for all supported vector VT's.
792
793 // For v2i64, these are only valid with P8Vector. This is corrected after
794 // the loop.
795 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
800 }
801 else {
806 }
807
808 if (Subtarget.hasVSX()) {
811 }
812
813 // Vector instructions introduced in P8
814 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
817 }
818 else {
821 }
822
823 // Vector instructions introduced in P9
824 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
826 else
828
829 // We promote all shuffles to v16i8.
831 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
832
833 // We promote all non-typed operations to v4i32.
835 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
837 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
839 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
841 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
843 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
846 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
848 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
849
850 // No other operations are legal.
889
890 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
891 setTruncStoreAction(VT, InnerVT, Expand);
894 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
895 }
896 }
898 if (!Subtarget.hasP8Vector()) {
899 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
900 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
901 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
902 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
903 }
904
905 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
906 // with merges, splats, etc.
908
909 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
910 // are cheap, so handle them before they get expanded to scalar.
916
917 setOperationAction(ISD::AND , MVT::v4i32, Legal);
918 setOperationAction(ISD::OR , MVT::v4i32, Legal);
919 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
920 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
922 Subtarget.useCRBits() ? Legal : Expand);
923 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
933 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
936
937 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
938 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
939 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
940 if (Subtarget.hasAltivec())
941 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
943 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
944 if (Subtarget.hasP8Altivec())
945 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
946
947 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
948 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
949 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
950 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
951
952 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
953 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
954
955 if (Subtarget.hasVSX()) {
956 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
957 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
959 }
960
961 if (Subtarget.hasP8Altivec())
962 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
963 else
964 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
965
966 if (Subtarget.isISA3_1()) {
967 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
968 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
969 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
970 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
971 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
972 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
973 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
974 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
975 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
976 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
977 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
978 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
979 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
980 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
981 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
982 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
983 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
984 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
985 }
986
987 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
988 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
989
992 // LE is P8+/64-bit so direct moves are supported and these operations
993 // are legal. The custom transformation requires 64-bit since we need a
994 // pair of stores that will cover a 128-bit load for P10.
995 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
999 }
1000
1005
1006 // Altivec does not contain unordered floating-point compare instructions
1007 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1008 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1009 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1010 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1011
1012 if (Subtarget.hasVSX()) {
1015 if (Subtarget.hasP8Vector()) {
1018 }
1019 if (Subtarget.hasDirectMove() && isPPC64) {
1028 }
1030
1031 // The nearbyint variants are not allowed to raise the inexact exception
1032 // so we can only code-gen them with unsafe math.
1033 if (TM.Options.UnsafeFPMath) {
1036 }
1037
1038 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1039 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1040 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1042 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1043 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1046
1048 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1049 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1052
1053 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1054 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1055
1056 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1057 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1058
1059 // Share the Altivec comparison restrictions.
1060 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1061 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1062 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1063 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1064
1065 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1066 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1067
1069
1070 if (Subtarget.hasP8Vector())
1071 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1072
1073 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1074
1075 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1076 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1077 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1078
1079 if (Subtarget.hasP8Altivec()) {
1080 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1081 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1082 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1083
1084 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1085 // SRL, but not for SRA because of the instructions available:
1086 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1087 // doing
1088 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1089 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1090 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1091
1092 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1093 }
1094 else {
1095 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1096 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1097 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1098
1099 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1100
1101 // VSX v2i64 only supports non-arithmetic operations.
1102 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1103 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1104 }
1105
1106 if (Subtarget.isISA3_1())
1107 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1108 else
1109 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1110
1111 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1112 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1114 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1115
1117
1126
1127 // Custom handling for partial vectors of integers converted to
1128 // floating point. We already have optimal handling for v2i32 through
1129 // the DAG combine, so those aren't necessary.
1146
1147 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1148 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1149 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1150 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1153
1156
1157 // Handle constrained floating-point operations of vector.
1158 // The predictor is `hasVSX` because altivec instruction has
1159 // no exception but VSX vector instruction has.
1173
1187
1188 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1189 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1190
1191 for (MVT FPT : MVT::fp_valuetypes())
1192 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1193
1194 // Expand the SELECT to SELECT_CC
1196
1197 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1198 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1199
1200 // No implementation for these ops for PowerPC.
1202 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1203 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1204 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1206 setOperationAction(ISD::FREM, MVT::f128, Expand);
1207 }
1208
1209 if (Subtarget.hasP8Altivec()) {
1210 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1211 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1212 }
1213
1214 if (Subtarget.hasP9Vector()) {
1217
1218 // Test data class instructions store results in CR bits.
1219 if (Subtarget.useCRBits()) {
1224 }
1225
1226 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1227 // SRL, but not for SRA because of the instructions available:
1228 // VS{RL} and VS{RL}O.
1229 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1230 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1231 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1232
1233 setOperationAction(ISD::FADD, MVT::f128, Legal);
1234 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1235 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1236 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1238
1239 setOperationAction(ISD::FMA, MVT::f128, Legal);
1246
1248 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1250 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1253
1257
1258 // Handle constrained floating-point operations of fp128
1275 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1276 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1277 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1278 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1279 } else if (Subtarget.hasVSX()) {
1282
1283 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1284 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1285
1286 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1287 // fp_to_uint and int_to_fp.
1290
1291 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1292 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1293 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1294 setOperationAction(ISD::FABS, MVT::f128, Expand);
1296 setOperationAction(ISD::FMA, MVT::f128, Expand);
1298
1299 // Expand the fp_extend if the target type is fp128.
1302
1303 // Expand the fp_round if the source type is fp128.
1304 for (MVT VT : {MVT::f32, MVT::f64}) {
1307 }
1308
1313
1314 // Lower following f128 select_cc pattern:
1315 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1317
1318 // We need to handle f128 SELECT_CC with integer result type.
1320 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1321 }
1322
1323 if (Subtarget.hasP9Altivec()) {
1324 if (Subtarget.isISA3_1()) {
1329 } else {
1332 }
1340
1341 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1342 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1343 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1344 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1345 }
1346
1347 if (Subtarget.hasP10Vector()) {
1349 }
1350 }
1351
1352 if (Subtarget.pairedVectorMemops()) {
1353 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1354 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1355 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1356 }
1357 if (Subtarget.hasMMA()) {
1358 if (Subtarget.isISAFuture())
1359 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1360 else
1361 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1362 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1363 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1365 }
1366
1367 if (Subtarget.has64BitSupport())
1369
1370 if (Subtarget.isISA3_1())
1371 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1372
1373 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1374
1375 if (!isPPC64) {
1378 }
1379
1384 }
1385
1387
1388 if (Subtarget.hasAltivec()) {
1389 // Altivec instructions set fields to all zeros or all ones.
1391 }
1392
1395 else if (isPPC64)
1397 else
1399
1400 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1401
1402 // We have target-specific dag combine patterns for the following nodes:
1405 if (Subtarget.hasFPCVT())
1408 if (Subtarget.useCRBits())
1412
1414
1416
1417 if (Subtarget.useCRBits()) {
1419 }
1420
1421 setLibcallName(RTLIB::LOG_F128, "logf128");
1422 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1423 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1424 setLibcallName(RTLIB::EXP_F128, "expf128");
1425 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1426 setLibcallName(RTLIB::SIN_F128, "sinf128");
1427 setLibcallName(RTLIB::COS_F128, "cosf128");
1428 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1429 setLibcallName(RTLIB::POW_F128, "powf128");
1430 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1431 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1432 setLibcallName(RTLIB::REM_F128, "fmodf128");
1433 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1434 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1435 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1436 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1437 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1438 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1439 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1440 setLibcallName(RTLIB::RINT_F128, "rintf128");
1441 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1442 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1443 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1444 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1445 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1446
1447 if (Subtarget.isAIXABI()) {
1448 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1449 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1450 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1451 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1452 }
1453
1454 // With 32 condition bits, we don't need to sink (and duplicate) compares
1455 // aggressively in CodeGenPrep.
1456 if (Subtarget.useCRBits()) {
1459 }
1460
1461 // TODO: The default entry number is set to 64. This stops most jump table
1462 // generation on PPC. But it is good for current PPC HWs because the indirect
1463 // branch instruction mtctr to the jump table may lead to bad branch predict.
1464 // Re-evaluate this value on future HWs that can do better with mtctr.
1466
1468
1469 switch (Subtarget.getCPUDirective()) {
1470 default: break;
1471 case PPC::DIR_970:
1472 case PPC::DIR_A2:
1473 case PPC::DIR_E500:
1474 case PPC::DIR_E500mc:
1475 case PPC::DIR_E5500:
1476 case PPC::DIR_PWR4:
1477 case PPC::DIR_PWR5:
1478 case PPC::DIR_PWR5X:
1479 case PPC::DIR_PWR6:
1480 case PPC::DIR_PWR6X:
1481 case PPC::DIR_PWR7:
1482 case PPC::DIR_PWR8:
1483 case PPC::DIR_PWR9:
1484 case PPC::DIR_PWR10:
1485 case PPC::DIR_PWR11:
1489 break;
1490 }
1491
1492 if (Subtarget.enableMachineScheduler())
1494 else
1496
1498
1499 // The Freescale cores do better with aggressive inlining of memcpy and
1500 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1501 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1502 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1503 MaxStoresPerMemset = 32;
1505 MaxStoresPerMemcpy = 32;
1509 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1510 // The A2 also benefits from (very) aggressive inlining of memcpy and
1511 // friends. The overhead of a the function call, even when warm, can be
1512 // over one hundred cycles.
1513 MaxStoresPerMemset = 128;
1514 MaxStoresPerMemcpy = 128;
1515 MaxStoresPerMemmove = 128;
1516 MaxLoadsPerMemcmp = 128;
1517 } else {
1520 }
1521
1522 IsStrictFPEnabled = true;
1523
1524 // Let the subtarget (CPU) decide if a predictable select is more expensive
1525 // than the corresponding branch. This information is used in CGP to decide
1526 // when to convert selects into branches.
1528
1530}
1531
1532// *********************************** NOTE ************************************
1533// For selecting load and store instructions, the addressing modes are defined
1534// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1535// patterns to match the load the store instructions.
1536//
1537// The TD definitions for the addressing modes correspond to their respective
1538// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1539// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1540// address mode flags of a particular node. Afterwards, the computed address
1541// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1542// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1543// accordingly, based on the preferred addressing mode.
1544//
1545// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1546// MemOpFlags contains all the possible flags that can be used to compute the
1547// optimal addressing mode for load and store instructions.
1548// AddrMode contains all the possible load and store addressing modes available
1549// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1550//
1551// When adding new load and store instructions, it is possible that new address
1552// flags may need to be added into MemOpFlags, and a new addressing mode will
1553// need to be added to AddrMode. An entry of the new addressing mode (consisting
1554// of the minimal and main distinguishing address flags for the new load/store
1555// instructions) will need to be added into initializeAddrModeMap() below.
1556// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1557// need to be updated to account for selecting the optimal addressing mode.
1558// *****************************************************************************
1559/// Initialize the map that relates the different addressing modes of the load
1560/// and store instructions to a set of flags. This ensures the load/store
1561/// instruction is correctly matched during instruction selection.
1562void PPCTargetLowering::initializeAddrModeMap() {
1563 AddrModesMap[PPC::AM_DForm] = {
1564 // LWZ, STW
1569 // LBZ, LHZ, STB, STH
1574 // LHA
1579 // LFS, LFD, STFS, STFD
1584 };
1585 AddrModesMap[PPC::AM_DSForm] = {
1586 // LWA
1590 // LD, STD
1594 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1598 };
1599 AddrModesMap[PPC::AM_DQForm] = {
1600 // LXV, STXV
1604 };
1605 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1607 // TODO: Add mapping for quadword load/store.
1608}
1609
1610/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1611/// the desired ByVal argument alignment.
1612static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1613 if (MaxAlign == MaxMaxAlign)
1614 return;
1615 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1616 if (MaxMaxAlign >= 32 &&
1617 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1618 MaxAlign = Align(32);
1619 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1620 MaxAlign < 16)
1621 MaxAlign = Align(16);
1622 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1623 Align EltAlign;
1624 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1625 if (EltAlign > MaxAlign)
1626 MaxAlign = EltAlign;
1627 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1628 for (auto *EltTy : STy->elements()) {
1629 Align EltAlign;
1630 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1631 if (EltAlign > MaxAlign)
1632 MaxAlign = EltAlign;
1633 if (MaxAlign == MaxMaxAlign)
1634 break;
1635 }
1636 }
1637}
1638
1639/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1640/// function arguments in the caller parameter area.
1642 const DataLayout &DL) const {
1643 // 16byte and wider vectors are passed on 16byte boundary.
1644 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1645 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1646 if (Subtarget.hasAltivec())
1647 getMaxByValAlign(Ty, Alignment, Align(16));
1648 return Alignment;
1649}
1650
1652 return Subtarget.useSoftFloat();
1653}
1654
1656 return Subtarget.hasSPE();
1657}
1658
1660 return VT.isScalarInteger();
1661}
1662
1664 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1665 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1666 return false;
1667
1668 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1669 if (VTy->getScalarType()->isIntegerTy()) {
1670 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1671 if (ElemSizeInBits == 32) {
1672 Index = Subtarget.isLittleEndian() ? 2 : 1;
1673 return true;
1674 }
1675 if (ElemSizeInBits == 64) {
1676 Index = Subtarget.isLittleEndian() ? 1 : 0;
1677 return true;
1678 }
1679 }
1680 }
1681 return false;
1682}
1683
1684const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1685 switch ((PPCISD::NodeType)Opcode) {
1686 case PPCISD::FIRST_NUMBER: break;
1687 case PPCISD::FSEL: return "PPCISD::FSEL";
1688 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1689 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1690 case PPCISD::FCFID: return "PPCISD::FCFID";
1691 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1692 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1693 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1694 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1695 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1696 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1697 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1698 case PPCISD::FRE: return "PPCISD::FRE";
1699 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1700 case PPCISD::FTSQRT:
1701 return "PPCISD::FTSQRT";
1702 case PPCISD::FSQRT:
1703 return "PPCISD::FSQRT";
1704 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1705 case PPCISD::VPERM: return "PPCISD::VPERM";
1706 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1708 return "PPCISD::XXSPLTI_SP_TO_DP";
1710 return "PPCISD::XXSPLTI32DX";
1711 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1712 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1713 case PPCISD::XXPERM:
1714 return "PPCISD::XXPERM";
1715 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1716 case PPCISD::CMPB: return "PPCISD::CMPB";
1717 case PPCISD::Hi: return "PPCISD::Hi";
1718 case PPCISD::Lo: return "PPCISD::Lo";
1719 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1720 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1721 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1722 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1723 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1724 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1725 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1726 case PPCISD::SRL: return "PPCISD::SRL";
1727 case PPCISD::SRA: return "PPCISD::SRA";
1728 case PPCISD::SHL: return "PPCISD::SHL";
1729 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1730 case PPCISD::CALL: return "PPCISD::CALL";
1731 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1732 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1733 case PPCISD::CALL_RM:
1734 return "PPCISD::CALL_RM";
1736 return "PPCISD::CALL_NOP_RM";
1738 return "PPCISD::CALL_NOTOC_RM";
1739 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1740 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1741 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1742 case PPCISD::BCTRL_RM:
1743 return "PPCISD::BCTRL_RM";
1745 return "PPCISD::BCTRL_LOAD_TOC_RM";
1746 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1747 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1748 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1749 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1750 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1751 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1752 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1753 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1754 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1755 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1757 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1759 return "PPCISD::ANDI_rec_1_EQ_BIT";
1761 return "PPCISD::ANDI_rec_1_GT_BIT";
1762 case PPCISD::VCMP: return "PPCISD::VCMP";
1763 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1764 case PPCISD::LBRX: return "PPCISD::LBRX";
1765 case PPCISD::STBRX: return "PPCISD::STBRX";
1766 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1767 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1768 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1769 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1770 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1771 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1772 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1773 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1774 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1776 return "PPCISD::ST_VSR_SCAL_INT";
1777 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1778 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1779 case PPCISD::BDZ: return "PPCISD::BDZ";
1780 case PPCISD::MFFS: return "PPCISD::MFFS";
1781 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1782 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1783 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1784 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1785 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1786 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1787 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1788 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1789 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1790 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1791 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1792 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1793 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1794 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1795 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1796 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1797 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1798 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1799 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1800 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1801 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1802 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1803 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1805 return "PPCISD::PADDI_DTPREL";
1806 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1807 case PPCISD::SC: return "PPCISD::SC";
1808 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1809 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1810 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1811 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1812 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1813 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1814 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1815 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1816 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1817 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1818 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1819 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1821 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1823 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1824 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1825 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1826 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1827 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1828 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1829 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1830 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1831 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1833 return "PPCISD::STRICT_FADDRTZ";
1835 return "PPCISD::STRICT_FCTIDZ";
1837 return "PPCISD::STRICT_FCTIWZ";
1839 return "PPCISD::STRICT_FCTIDUZ";
1841 return "PPCISD::STRICT_FCTIWUZ";
1843 return "PPCISD::STRICT_FCFID";
1845 return "PPCISD::STRICT_FCFIDU";
1847 return "PPCISD::STRICT_FCFIDS";
1849 return "PPCISD::STRICT_FCFIDUS";
1850 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1851 case PPCISD::STORE_COND:
1852 return "PPCISD::STORE_COND";
1853 case PPCISD::SETBC:
1854 return "PPCISD::SETBC";
1855 case PPCISD::SETBCR:
1856 return "PPCISD::SETBCR";
1857 }
1858 return nullptr;
1859}
1860
1862 EVT VT) const {
1863 if (!VT.isVector())
1864 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1865
1867}
1868
1870 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1871 return true;
1872}
1873
1874//===----------------------------------------------------------------------===//
1875// Node matching predicates, for use by the tblgen matching code.
1876//===----------------------------------------------------------------------===//
1877
1878/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1880 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1881 return CFP->getValueAPF().isZero();
1882 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1883 // Maybe this has already been legalized into the constant pool?
1884 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1885 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1886 return CFP->getValueAPF().isZero();
1887 }
1888 return false;
1889}
1890
1891/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1892/// true if Op is undef or if it matches the specified value.
1893static bool isConstantOrUndef(int Op, int Val) {
1894 return Op < 0 || Op == Val;
1895}
1896
1897/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1898/// VPKUHUM instruction.
1899/// The ShuffleKind distinguishes between big-endian operations with
1900/// two different inputs (0), either-endian operations with two identical
1901/// inputs (1), and little-endian operations with two different inputs (2).
1902/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1904 SelectionDAG &DAG) {
1905 bool IsLE = DAG.getDataLayout().isLittleEndian();
1906 if (ShuffleKind == 0) {
1907 if (IsLE)
1908 return false;
1909 for (unsigned i = 0; i != 16; ++i)
1910 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1911 return false;
1912 } else if (ShuffleKind == 2) {
1913 if (!IsLE)
1914 return false;
1915 for (unsigned i = 0; i != 16; ++i)
1916 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1917 return false;
1918 } else if (ShuffleKind == 1) {
1919 unsigned j = IsLE ? 0 : 1;
1920 for (unsigned i = 0; i != 8; ++i)
1921 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1922 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1923 return false;
1924 }
1925 return true;
1926}
1927
1928/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1929/// VPKUWUM instruction.
1930/// The ShuffleKind distinguishes between big-endian operations with
1931/// two different inputs (0), either-endian operations with two identical
1932/// inputs (1), and little-endian operations with two different inputs (2).
1933/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1935 SelectionDAG &DAG) {
1936 bool IsLE = DAG.getDataLayout().isLittleEndian();
1937 if (ShuffleKind == 0) {
1938 if (IsLE)
1939 return false;
1940 for (unsigned i = 0; i != 16; i += 2)
1941 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1942 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1943 return false;
1944 } else if (ShuffleKind == 2) {
1945 if (!IsLE)
1946 return false;
1947 for (unsigned i = 0; i != 16; i += 2)
1948 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1949 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1950 return false;
1951 } else if (ShuffleKind == 1) {
1952 unsigned j = IsLE ? 0 : 2;
1953 for (unsigned i = 0; i != 8; i += 2)
1954 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1955 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1956 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1957 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1958 return false;
1959 }
1960 return true;
1961}
1962
1963/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1964/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1965/// current subtarget.
1966///
1967/// The ShuffleKind distinguishes between big-endian operations with
1968/// two different inputs (0), either-endian operations with two identical
1969/// inputs (1), and little-endian operations with two different inputs (2).
1970/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1972 SelectionDAG &DAG) {
1973 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1974 if (!Subtarget.hasP8Vector())
1975 return false;
1976
1977 bool IsLE = DAG.getDataLayout().isLittleEndian();
1978 if (ShuffleKind == 0) {
1979 if (IsLE)
1980 return false;
1981 for (unsigned i = 0; i != 16; i += 4)
1982 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1983 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1984 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1985 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1986 return false;
1987 } else if (ShuffleKind == 2) {
1988 if (!IsLE)
1989 return false;
1990 for (unsigned i = 0; i != 16; i += 4)
1991 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1992 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1993 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1994 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1995 return false;
1996 } else if (ShuffleKind == 1) {
1997 unsigned j = IsLE ? 0 : 4;
1998 for (unsigned i = 0; i != 8; i += 4)
1999 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
2000 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
2001 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
2002 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
2003 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
2004 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
2005 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
2006 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
2007 return false;
2008 }
2009 return true;
2010}
2011
2012/// isVMerge - Common function, used to match vmrg* shuffles.
2013///
2014static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2015 unsigned LHSStart, unsigned RHSStart) {
2016 if (N->getValueType(0) != MVT::v16i8)
2017 return false;
2018 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2019 "Unsupported merge size!");
2020
2021 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2022 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2023 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2024 LHSStart+j+i*UnitSize) ||
2025 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2026 RHSStart+j+i*UnitSize))
2027 return false;
2028 }
2029 return true;
2030}
2031
2032/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2033/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2034/// The ShuffleKind distinguishes between big-endian merges with two
2035/// different inputs (0), either-endian merges with two identical inputs (1),
2036/// and little-endian merges with two different inputs (2). For the latter,
2037/// the input operands are swapped (see PPCInstrAltivec.td).
2039 unsigned ShuffleKind, SelectionDAG &DAG) {
2040 if (DAG.getDataLayout().isLittleEndian()) {
2041 if (ShuffleKind == 1) // unary
2042 return isVMerge(N, UnitSize, 0, 0);
2043 else if (ShuffleKind == 2) // swapped
2044 return isVMerge(N, UnitSize, 0, 16);
2045 else
2046 return false;
2047 } else {
2048 if (ShuffleKind == 1) // unary
2049 return isVMerge(N, UnitSize, 8, 8);
2050 else if (ShuffleKind == 0) // normal
2051 return isVMerge(N, UnitSize, 8, 24);
2052 else
2053 return false;
2054 }
2055}
2056
2057/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2058/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2059/// The ShuffleKind distinguishes between big-endian merges with two
2060/// different inputs (0), either-endian merges with two identical inputs (1),
2061/// and little-endian merges with two different inputs (2). For the latter,
2062/// the input operands are swapped (see PPCInstrAltivec.td).
2064 unsigned ShuffleKind, SelectionDAG &DAG) {
2065 if (DAG.getDataLayout().isLittleEndian()) {
2066 if (ShuffleKind == 1) // unary
2067 return isVMerge(N, UnitSize, 8, 8);
2068 else if (ShuffleKind == 2) // swapped
2069 return isVMerge(N, UnitSize, 8, 24);
2070 else
2071 return false;
2072 } else {
2073 if (ShuffleKind == 1) // unary
2074 return isVMerge(N, UnitSize, 0, 0);
2075 else if (ShuffleKind == 0) // normal
2076 return isVMerge(N, UnitSize, 0, 16);
2077 else
2078 return false;
2079 }
2080}
2081
2082/**
2083 * Common function used to match vmrgew and vmrgow shuffles
2084 *
2085 * The indexOffset determines whether to look for even or odd words in
2086 * the shuffle mask. This is based on the of the endianness of the target
2087 * machine.
2088 * - Little Endian:
2089 * - Use offset of 0 to check for odd elements
2090 * - Use offset of 4 to check for even elements
2091 * - Big Endian:
2092 * - Use offset of 0 to check for even elements
2093 * - Use offset of 4 to check for odd elements
2094 * A detailed description of the vector element ordering for little endian and
2095 * big endian can be found at
2096 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2097 * Targeting your applications - what little endian and big endian IBM XL C/C++
2098 * compiler differences mean to you
2099 *
2100 * The mask to the shuffle vector instruction specifies the indices of the
2101 * elements from the two input vectors to place in the result. The elements are
2102 * numbered in array-access order, starting with the first vector. These vectors
2103 * are always of type v16i8, thus each vector will contain 16 elements of size
2104 * 8. More info on the shuffle vector can be found in the
2105 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2106 * Language Reference.
2107 *
2108 * The RHSStartValue indicates whether the same input vectors are used (unary)
2109 * or two different input vectors are used, based on the following:
2110 * - If the instruction uses the same vector for both inputs, the range of the
2111 * indices will be 0 to 15. In this case, the RHSStart value passed should
2112 * be 0.
2113 * - If the instruction has two different vectors then the range of the
2114 * indices will be 0 to 31. In this case, the RHSStart value passed should
2115 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2116 * to 31 specify elements in the second vector).
2117 *
2118 * \param[in] N The shuffle vector SD Node to analyze
2119 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2120 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2121 * vector to the shuffle_vector instruction
2122 * \return true iff this shuffle vector represents an even or odd word merge
2123 */
2124static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2125 unsigned RHSStartValue) {
2126 if (N->getValueType(0) != MVT::v16i8)
2127 return false;
2128
2129 for (unsigned i = 0; i < 2; ++i)
2130 for (unsigned j = 0; j < 4; ++j)
2131 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2132 i*RHSStartValue+j+IndexOffset) ||
2133 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2134 i*RHSStartValue+j+IndexOffset+8))
2135 return false;
2136 return true;
2137}
2138
2139/**
2140 * Determine if the specified shuffle mask is suitable for the vmrgew or
2141 * vmrgow instructions.
2142 *
2143 * \param[in] N The shuffle vector SD Node to analyze
2144 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2145 * \param[in] ShuffleKind Identify the type of merge:
2146 * - 0 = big-endian merge with two different inputs;
2147 * - 1 = either-endian merge with two identical inputs;
2148 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2149 * little-endian merges).
2150 * \param[in] DAG The current SelectionDAG
2151 * \return true iff this shuffle mask
2152 */
2154 unsigned ShuffleKind, SelectionDAG &DAG) {
2155 if (DAG.getDataLayout().isLittleEndian()) {
2156 unsigned indexOffset = CheckEven ? 4 : 0;
2157 if (ShuffleKind == 1) // Unary
2158 return isVMerge(N, indexOffset, 0);
2159 else if (ShuffleKind == 2) // swapped
2160 return isVMerge(N, indexOffset, 16);
2161 else
2162 return false;
2163 }
2164 else {
2165 unsigned indexOffset = CheckEven ? 0 : 4;
2166 if (ShuffleKind == 1) // Unary
2167 return isVMerge(N, indexOffset, 0);
2168 else if (ShuffleKind == 0) // Normal
2169 return isVMerge(N, indexOffset, 16);
2170 else
2171 return false;
2172 }
2173 return false;
2174}
2175
2176/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2177/// amount, otherwise return -1.
2178/// The ShuffleKind distinguishes between big-endian operations with two
2179/// different inputs (0), either-endian operations with two identical inputs
2180/// (1), and little-endian operations with two different inputs (2). For the
2181/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2182int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2183 SelectionDAG &DAG) {
2184 if (N->getValueType(0) != MVT::v16i8)
2185 return -1;
2186
2187 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2188
2189 // Find the first non-undef value in the shuffle mask.
2190 unsigned i;
2191 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2192 /*search*/;
2193
2194 if (i == 16) return -1; // all undef.
2195
2196 // Otherwise, check to see if the rest of the elements are consecutively
2197 // numbered from this value.
2198 unsigned ShiftAmt = SVOp->getMaskElt(i);
2199 if (ShiftAmt < i) return -1;
2200
2201 ShiftAmt -= i;
2202 bool isLE = DAG.getDataLayout().isLittleEndian();
2203
2204 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2205 // Check the rest of the elements to see if they are consecutive.
2206 for (++i; i != 16; ++i)
2207 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2208 return -1;
2209 } else if (ShuffleKind == 1) {
2210 // Check the rest of the elements to see if they are consecutive.
2211 for (++i; i != 16; ++i)
2212 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2213 return -1;
2214 } else
2215 return -1;
2216
2217 if (isLE)
2218 ShiftAmt = 16 - ShiftAmt;
2219
2220 return ShiftAmt;
2221}
2222
2223/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2224/// specifies a splat of a single element that is suitable for input to
2225/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2227 EVT VT = N->getValueType(0);
2228 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2229 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2230
2231 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2232 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2233
2234 // The consecutive indices need to specify an element, not part of two
2235 // different elements. So abandon ship early if this isn't the case.
2236 if (N->getMaskElt(0) % EltSize != 0)
2237 return false;
2238
2239 // This is a splat operation if each element of the permute is the same, and
2240 // if the value doesn't reference the second vector.
2241 unsigned ElementBase = N->getMaskElt(0);
2242
2243 // FIXME: Handle UNDEF elements too!
2244 if (ElementBase >= 16)
2245 return false;
2246
2247 // Check that the indices are consecutive, in the case of a multi-byte element
2248 // splatted with a v16i8 mask.
2249 for (unsigned i = 1; i != EltSize; ++i)
2250 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2251 return false;
2252
2253 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2254 if (N->getMaskElt(i) < 0) continue;
2255 for (unsigned j = 0; j != EltSize; ++j)
2256 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2257 return false;
2258 }
2259 return true;
2260}
2261
2262/// Check that the mask is shuffling N byte elements. Within each N byte
2263/// element of the mask, the indices could be either in increasing or
2264/// decreasing order as long as they are consecutive.
2265/// \param[in] N the shuffle vector SD Node to analyze
2266/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2267/// Word/DoubleWord/QuadWord).
2268/// \param[in] StepLen the delta indices number among the N byte element, if
2269/// the mask is in increasing/decreasing order then it is 1/-1.
2270/// \return true iff the mask is shuffling N byte elements.
2271static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2272 int StepLen) {
2273 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2274 "Unexpected element width.");
2275 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2276
2277 unsigned NumOfElem = 16 / Width;
2278 unsigned MaskVal[16]; // Width is never greater than 16
2279 for (unsigned i = 0; i < NumOfElem; ++i) {
2280 MaskVal[0] = N->getMaskElt(i * Width);
2281 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2282 return false;
2283 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2284 return false;
2285 }
2286
2287 for (unsigned int j = 1; j < Width; ++j) {
2288 MaskVal[j] = N->getMaskElt(i * Width + j);
2289 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2290 return false;
2291 }
2292 }
2293 }
2294
2295 return true;
2296}
2297
2298bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2299 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2300 if (!isNByteElemShuffleMask(N, 4, 1))
2301 return false;
2302
2303 // Now we look at mask elements 0,4,8,12
2304 unsigned M0 = N->getMaskElt(0) / 4;
2305 unsigned M1 = N->getMaskElt(4) / 4;
2306 unsigned M2 = N->getMaskElt(8) / 4;
2307 unsigned M3 = N->getMaskElt(12) / 4;
2308 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2309 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2310
2311 // Below, let H and L be arbitrary elements of the shuffle mask
2312 // where H is in the range [4,7] and L is in the range [0,3].
2313 // H, 1, 2, 3 or L, 5, 6, 7
2314 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2315 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2316 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2317 InsertAtByte = IsLE ? 12 : 0;
2318 Swap = M0 < 4;
2319 return true;
2320 }
2321 // 0, H, 2, 3 or 4, L, 6, 7
2322 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2323 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2324 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2325 InsertAtByte = IsLE ? 8 : 4;
2326 Swap = M1 < 4;
2327 return true;
2328 }
2329 // 0, 1, H, 3 or 4, 5, L, 7
2330 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2331 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2332 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2333 InsertAtByte = IsLE ? 4 : 8;
2334 Swap = M2 < 4;
2335 return true;
2336 }
2337 // 0, 1, 2, H or 4, 5, 6, L
2338 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2339 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2340 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2341 InsertAtByte = IsLE ? 0 : 12;
2342 Swap = M3 < 4;
2343 return true;
2344 }
2345
2346 // If both vector operands for the shuffle are the same vector, the mask will
2347 // contain only elements from the first one and the second one will be undef.
2348 if (N->getOperand(1).isUndef()) {
2349 ShiftElts = 0;
2350 Swap = true;
2351 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2352 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2353 InsertAtByte = IsLE ? 12 : 0;
2354 return true;
2355 }
2356 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2357 InsertAtByte = IsLE ? 8 : 4;
2358 return true;
2359 }
2360 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2361 InsertAtByte = IsLE ? 4 : 8;
2362 return true;
2363 }
2364 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2365 InsertAtByte = IsLE ? 0 : 12;
2366 return true;
2367 }
2368 }
2369
2370 return false;
2371}
2372
2374 bool &Swap, bool IsLE) {
2375 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2376 // Ensure each byte index of the word is consecutive.
2377 if (!isNByteElemShuffleMask(N, 4, 1))
2378 return false;
2379
2380 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2381 unsigned M0 = N->getMaskElt(0) / 4;
2382 unsigned M1 = N->getMaskElt(4) / 4;
2383 unsigned M2 = N->getMaskElt(8) / 4;
2384 unsigned M3 = N->getMaskElt(12) / 4;
2385
2386 // If both vector operands for the shuffle are the same vector, the mask will
2387 // contain only elements from the first one and the second one will be undef.
2388 if (N->getOperand(1).isUndef()) {
2389 assert(M0 < 4 && "Indexing into an undef vector?");
2390 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2391 return false;
2392
2393 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2394 Swap = false;
2395 return true;
2396 }
2397
2398 // Ensure each word index of the ShuffleVector Mask is consecutive.
2399 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2400 return false;
2401
2402 if (IsLE) {
2403 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2404 // Input vectors don't need to be swapped if the leading element
2405 // of the result is one of the 3 left elements of the second vector
2406 // (or if there is no shift to be done at all).
2407 Swap = false;
2408 ShiftElts = (8 - M0) % 8;
2409 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2410 // Input vectors need to be swapped if the leading element
2411 // of the result is one of the 3 left elements of the first vector
2412 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2413 Swap = true;
2414 ShiftElts = (4 - M0) % 4;
2415 }
2416
2417 return true;
2418 } else { // BE
2419 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2420 // Input vectors don't need to be swapped if the leading element
2421 // of the result is one of the 4 elements of the first vector.
2422 Swap = false;
2423 ShiftElts = M0;
2424 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2425 // Input vectors need to be swapped if the leading element
2426 // of the result is one of the 4 elements of the right vector.
2427 Swap = true;
2428 ShiftElts = M0 - 4;
2429 }
2430
2431 return true;
2432 }
2433}
2434
2436 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2437
2438 if (!isNByteElemShuffleMask(N, Width, -1))
2439 return false;
2440
2441 for (int i = 0; i < 16; i += Width)
2442 if (N->getMaskElt(i) != i + Width - 1)
2443 return false;
2444
2445 return true;
2446}
2447
2449 return isXXBRShuffleMaskHelper(N, 2);
2450}
2451
2453 return isXXBRShuffleMaskHelper(N, 4);
2454}
2455
2457 return isXXBRShuffleMaskHelper(N, 8);
2458}
2459
2461 return isXXBRShuffleMaskHelper(N, 16);
2462}
2463
2464/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2465/// if the inputs to the instruction should be swapped and set \p DM to the
2466/// value for the immediate.
2467/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2468/// AND element 0 of the result comes from the first input (LE) or second input
2469/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2470/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2471/// mask.
2473 bool &Swap, bool IsLE) {
2474 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2475
2476 // Ensure each byte index of the double word is consecutive.
2477 if (!isNByteElemShuffleMask(N, 8, 1))
2478 return false;
2479
2480 unsigned M0 = N->getMaskElt(0) / 8;
2481 unsigned M1 = N->getMaskElt(8) / 8;
2482 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2483
2484 // If both vector operands for the shuffle are the same vector, the mask will
2485 // contain only elements from the first one and the second one will be undef.
2486 if (N->getOperand(1).isUndef()) {
2487 if ((M0 | M1) < 2) {
2488 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2489 Swap = false;
2490 return true;
2491 } else
2492 return false;
2493 }
2494
2495 if (IsLE) {
2496 if (M0 > 1 && M1 < 2) {
2497 Swap = false;
2498 } else if (M0 < 2 && M1 > 1) {
2499 M0 = (M0 + 2) % 4;
2500 M1 = (M1 + 2) % 4;
2501 Swap = true;
2502 } else
2503 return false;
2504
2505 // Note: if control flow comes here that means Swap is already set above
2506 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2507 return true;
2508 } else { // BE
2509 if (M0 < 2 && M1 > 1) {
2510 Swap = false;
2511 } else if (M0 > 1 && M1 < 2) {
2512 M0 = (M0 + 2) % 4;
2513 M1 = (M1 + 2) % 4;
2514 Swap = true;
2515 } else
2516 return false;
2517
2518 // Note: if control flow comes here that means Swap is already set above
2519 DM = (M0 << 1) + (M1 & 1);
2520 return true;
2521 }
2522}
2523
2524
2525/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2526/// appropriate for PPC mnemonics (which have a big endian bias - namely
2527/// elements are counted from the left of the vector register).
2528unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2529 SelectionDAG &DAG) {
2530 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2531 assert(isSplatShuffleMask(SVOp, EltSize));
2532 EVT VT = SVOp->getValueType(0);
2533
2534 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2535 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2536 : SVOp->getMaskElt(0);
2537
2538 if (DAG.getDataLayout().isLittleEndian())
2539 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2540 else
2541 return SVOp->getMaskElt(0) / EltSize;
2542}
2543
2544/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2545/// by using a vspltis[bhw] instruction of the specified element size, return
2546/// the constant being splatted. The ByteSize field indicates the number of
2547/// bytes of each element [124] -> [bhw].
2549 SDValue OpVal;
2550
2551 // If ByteSize of the splat is bigger than the element size of the
2552 // build_vector, then we have a case where we are checking for a splat where
2553 // multiple elements of the buildvector are folded together into a single
2554 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2555 unsigned EltSize = 16/N->getNumOperands();
2556 if (EltSize < ByteSize) {
2557 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2558 SDValue UniquedVals[4];
2559 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2560
2561 // See if all of the elements in the buildvector agree across.
2562 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2563 if (N->getOperand(i).isUndef()) continue;
2564 // If the element isn't a constant, bail fully out.
2565 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2566
2567 if (!UniquedVals[i&(Multiple-1)].getNode())
2568 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2569 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2570 return SDValue(); // no match.
2571 }
2572
2573 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2574 // either constant or undef values that are identical for each chunk. See
2575 // if these chunks can form into a larger vspltis*.
2576
2577 // Check to see if all of the leading entries are either 0 or -1. If
2578 // neither, then this won't fit into the immediate field.
2579 bool LeadingZero = true;
2580 bool LeadingOnes = true;
2581 for (unsigned i = 0; i != Multiple-1; ++i) {
2582 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2583
2584 LeadingZero &= isNullConstant(UniquedVals[i]);
2585 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2586 }
2587 // Finally, check the least significant entry.
2588 if (LeadingZero) {
2589 if (!UniquedVals[Multiple-1].getNode())
2590 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2591 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2592 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2593 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2594 }
2595 if (LeadingOnes) {
2596 if (!UniquedVals[Multiple-1].getNode())
2597 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2598 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2599 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2600 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2601 }
2602
2603 return SDValue();
2604 }
2605
2606 // Check to see if this buildvec has a single non-undef value in its elements.
2607 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2608 if (N->getOperand(i).isUndef()) continue;
2609 if (!OpVal.getNode())
2610 OpVal = N->getOperand(i);
2611 else if (OpVal != N->getOperand(i))
2612 return SDValue();
2613 }
2614
2615 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2616
2617 unsigned ValSizeInBytes = EltSize;
2618 uint64_t Value = 0;
2619 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2620 Value = CN->getZExtValue();
2621 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2622 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2623 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2624 }
2625
2626 // If the splat value is larger than the element value, then we can never do
2627 // this splat. The only case that we could fit the replicated bits into our
2628 // immediate field for would be zero, and we prefer to use vxor for it.
2629 if (ValSizeInBytes < ByteSize) return SDValue();
2630
2631 // If the element value is larger than the splat value, check if it consists
2632 // of a repeated bit pattern of size ByteSize.
2633 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2634 return SDValue();
2635
2636 // Properly sign extend the value.
2637 int MaskVal = SignExtend32(Value, ByteSize * 8);
2638
2639 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2640 if (MaskVal == 0) return SDValue();
2641
2642 // Finally, if this value fits in a 5 bit sext field, return it
2643 if (SignExtend32<5>(MaskVal) == MaskVal)
2644 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2645 return SDValue();
2646}
2647
2648//===----------------------------------------------------------------------===//
2649// Addressing Mode Selection
2650//===----------------------------------------------------------------------===//
2651
2652/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2653/// or 64-bit immediate, and if the value can be accurately represented as a
2654/// sign extension from a 16-bit value. If so, this returns true and the
2655/// immediate.
2656bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2657 if (!isa<ConstantSDNode>(N))
2658 return false;
2659
2660 Imm = (int16_t)N->getAsZExtVal();
2661 if (N->getValueType(0) == MVT::i32)
2662 return Imm == (int32_t)N->getAsZExtVal();
2663 else
2664 return Imm == (int64_t)N->getAsZExtVal();
2665}
2667 return isIntS16Immediate(Op.getNode(), Imm);
2668}
2669
2670/// Used when computing address flags for selecting loads and stores.
2671/// If we have an OR, check if the LHS and RHS are provably disjoint.
2672/// An OR of two provably disjoint values is equivalent to an ADD.
2673/// Most PPC load/store instructions compute the effective address as a sum,
2674/// so doing this conversion is useful.
2675static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2676 if (N.getOpcode() != ISD::OR)
2677 return false;
2678 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2679 if (!LHSKnown.Zero.getBoolValue())
2680 return false;
2681 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2682 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2683}
2684
2685/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2686/// be represented as an indexed [r+r] operation.
2688 SDValue &Index,
2689 SelectionDAG &DAG) const {
2690 for (SDNode *U : N->users()) {
2691 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2692 if (Memop->getMemoryVT() == MVT::f64) {
2693 Base = N.getOperand(0);
2694 Index = N.getOperand(1);
2695 return true;
2696 }
2697 }
2698 }
2699 return false;
2700}
2701
2702/// isIntS34Immediate - This method tests if value of node given can be
2703/// accurately represented as a sign extension from a 34-bit value. If so,
2704/// this returns true and the immediate.
2705bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2706 if (!isa<ConstantSDNode>(N))
2707 return false;
2708
2709 Imm = (int64_t)cast<ConstantSDNode>(N)->getSExtValue();
2710 return isInt<34>(Imm);
2711}
2713 return isIntS34Immediate(Op.getNode(), Imm);
2714}
2715
2716/// SelectAddressRegReg - Given the specified addressed, check to see if it
2717/// can be represented as an indexed [r+r] operation. Returns false if it
2718/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2719/// non-zero and N can be represented by a base register plus a signed 16-bit
2720/// displacement, make a more precise judgement by checking (displacement % \p
2721/// EncodingAlignment).
2723 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2724 MaybeAlign EncodingAlignment) const {
2725 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2726 // a [pc+imm].
2728 return false;
2729
2730 int16_t Imm = 0;
2731 if (N.getOpcode() == ISD::ADD) {
2732 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2733 // SPE load/store can only handle 8-bit offsets.
2734 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2735 return true;
2736 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2737 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2738 return false; // r+i
2739 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2740 return false; // r+i
2741
2742 Base = N.getOperand(0);
2743 Index = N.getOperand(1);
2744 return true;
2745 } else if (N.getOpcode() == ISD::OR) {
2746 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2747 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2748 return false; // r+i can fold it if we can.
2749
2750 // If this is an or of disjoint bitfields, we can codegen this as an add
2751 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2752 // disjoint.
2753 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2754
2755 if (LHSKnown.Zero.getBoolValue()) {
2756 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2757 // If all of the bits are known zero on the LHS or RHS, the add won't
2758 // carry.
2759 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2760 Base = N.getOperand(0);
2761 Index = N.getOperand(1);
2762 return true;
2763 }
2764 }
2765 }
2766
2767 return false;
2768}
2769
2770// If we happen to be doing an i64 load or store into a stack slot that has
2771// less than a 4-byte alignment, then the frame-index elimination may need to
2772// use an indexed load or store instruction (because the offset may not be a
2773// multiple of 4). The extra register needed to hold the offset comes from the
2774// register scavenger, and it is possible that the scavenger will need to use
2775// an emergency spill slot. As a result, we need to make sure that a spill slot
2776// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2777// stack slot.
2778static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2779 // FIXME: This does not handle the LWA case.
2780 if (VT != MVT::i64)
2781 return;
2782
2783 // NOTE: We'll exclude negative FIs here, which come from argument
2784 // lowering, because there are no known test cases triggering this problem
2785 // using packed structures (or similar). We can remove this exclusion if
2786 // we find such a test case. The reason why this is so test-case driven is
2787 // because this entire 'fixup' is only to prevent crashes (from the
2788 // register scavenger) on not-really-valid inputs. For example, if we have:
2789 // %a = alloca i1
2790 // %b = bitcast i1* %a to i64*
2791 // store i64* a, i64 b
2792 // then the store should really be marked as 'align 1', but is not. If it
2793 // were marked as 'align 1' then the indexed form would have been
2794 // instruction-selected initially, and the problem this 'fixup' is preventing
2795 // won't happen regardless.
2796 if (FrameIdx < 0)
2797 return;
2798
2800 MachineFrameInfo &MFI = MF.getFrameInfo();
2801
2802 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2803 return;
2804
2805 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2806 FuncInfo->setHasNonRISpills();
2807}
2808
2809/// Returns true if the address N can be represented by a base register plus
2810/// a signed 16-bit displacement [r+imm], and if it is not better
2811/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2812/// displacements that are multiples of that value.
2814 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2815 MaybeAlign EncodingAlignment) const {
2816 // FIXME dl should come from parent load or store, not from address
2817 SDLoc dl(N);
2818
2819 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2820 // a [pc+imm].
2822 return false;
2823
2824 // If this can be more profitably realized as r+r, fail.
2825 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2826 return false;
2827
2828 if (N.getOpcode() == ISD::ADD) {
2829 int16_t imm = 0;
2830 if (isIntS16Immediate(N.getOperand(1), imm) &&
2831 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2832 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2833 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2834 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2835 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2836 } else {
2837 Base = N.getOperand(0);
2838 }
2839 return true; // [r+i]
2840 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2841 // Match LOAD (ADD (X, Lo(G))).
2842 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2843 "Cannot handle constant offsets yet!");
2844 Disp = N.getOperand(1).getOperand(0); // The global address.
2849 Base = N.getOperand(0);
2850 return true; // [&g+r]
2851 }
2852 } else if (N.getOpcode() == ISD::OR) {
2853 int16_t imm = 0;
2854 if (isIntS16Immediate(N.getOperand(1), imm) &&
2855 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2856 // If this is an or of disjoint bitfields, we can codegen this as an add
2857 // (for better address arithmetic) if the LHS and RHS of the OR are
2858 // provably disjoint.
2859 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2860
2861 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2862 // If all of the bits are known zero on the LHS or RHS, the add won't
2863 // carry.
2864 if (FrameIndexSDNode *FI =
2865 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2866 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2867 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2868 } else {
2869 Base = N.getOperand(0);
2870 }
2871 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2872 return true;
2873 }
2874 }
2875 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2876 // Loading from a constant address.
2877
2878 // If this address fits entirely in a 16-bit sext immediate field, codegen
2879 // this as "d, 0"
2880 int16_t Imm;
2881 if (isIntS16Immediate(CN, Imm) &&
2882 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2883 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2884 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2885 CN->getValueType(0));
2886 return true;
2887 }
2888
2889 // Handle 32-bit sext immediates with LIS + addr mode.
2890 if ((CN->getValueType(0) == MVT::i32 ||
2891 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2892 (!EncodingAlignment ||
2893 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2894 int Addr = (int)CN->getZExtValue();
2895
2896 // Otherwise, break this down into an LIS + disp.
2897 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2898
2899 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2900 MVT::i32);
2901 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2902 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2903 return true;
2904 }
2905 }
2906
2907 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2908 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2909 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2910 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2911 } else
2912 Base = N;
2913 return true; // [r+0]
2914}
2915
2916/// Similar to the 16-bit case but for instructions that take a 34-bit
2917/// displacement field (prefixed loads/stores).
2919 SDValue &Base,
2920 SelectionDAG &DAG) const {
2921 // Only on 64-bit targets.
2922 if (N.getValueType() != MVT::i64)
2923 return false;
2924
2925 SDLoc dl(N);
2926 int64_t Imm = 0;
2927
2928 if (N.getOpcode() == ISD::ADD) {
2929 if (!isIntS34Immediate(N.getOperand(1), Imm))
2930 return false;
2931 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2932 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2933 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2934 else
2935 Base = N.getOperand(0);
2936 return true;
2937 }
2938
2939 if (N.getOpcode() == ISD::OR) {
2940 if (!isIntS34Immediate(N.getOperand(1), Imm))
2941 return false;
2942 // If this is an or of disjoint bitfields, we can codegen this as an add
2943 // (for better address arithmetic) if the LHS and RHS of the OR are
2944 // provably disjoint.
2945 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2946 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2947 return false;
2948 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2949 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2950 else
2951 Base = N.getOperand(0);
2952 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2953 return true;
2954 }
2955
2956 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2957 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2958 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2959 return true;
2960 }
2961
2962 return false;
2963}
2964
2965/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2966/// represented as an indexed [r+r] operation.
2968 SDValue &Index,
2969 SelectionDAG &DAG) const {
2970 // Check to see if we can easily represent this as an [r+r] address. This
2971 // will fail if it thinks that the address is more profitably represented as
2972 // reg+imm, e.g. where imm = 0.
2973 if (SelectAddressRegReg(N, Base, Index, DAG))
2974 return true;
2975
2976 // If the address is the result of an add, we will utilize the fact that the
2977 // address calculation includes an implicit add. However, we can reduce
2978 // register pressure if we do not materialize a constant just for use as the
2979 // index register. We only get rid of the add if it is not an add of a
2980 // value and a 16-bit signed constant and both have a single use.
2981 int16_t imm = 0;
2982 if (N.getOpcode() == ISD::ADD &&
2983 (!isIntS16Immediate(N.getOperand(1), imm) ||
2984 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2985 Base = N.getOperand(0);
2986 Index = N.getOperand(1);
2987 return true;
2988 }
2989
2990 // Otherwise, do it the hard way, using R0 as the base register.
2991 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2992 N.getValueType());
2993 Index = N;
2994 return true;
2995}
2996
2997template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2998 Ty *PCRelCand = dyn_cast<Ty>(N);
2999 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
3000}
3001
3002/// Returns true if this address is a PC Relative address.
3003/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
3004/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
3006 // This is a materialize PC Relative node. Always select this as PC Relative.
3007 Base = N;
3008 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3009 return true;
3010 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
3011 isValidPCRelNode<GlobalAddressSDNode>(N) ||
3012 isValidPCRelNode<JumpTableSDNode>(N) ||
3013 isValidPCRelNode<BlockAddressSDNode>(N))
3014 return true;
3015 return false;
3016}
3017
3018/// Returns true if we should use a direct load into vector instruction
3019/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3020static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3021
3022 // If there are any other uses other than scalar to vector, then we should
3023 // keep it as a scalar load -> direct move pattern to prevent multiple
3024 // loads.
3025 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3026 if (!LD)
3027 return false;
3028
3029 EVT MemVT = LD->getMemoryVT();
3030 if (!MemVT.isSimple())
3031 return false;
3032 switch(MemVT.getSimpleVT().SimpleTy) {
3033 case MVT::i64:
3034 break;
3035 case MVT::i32:
3036 if (!ST.hasP8Vector())
3037 return false;
3038 break;
3039 case MVT::i16:
3040 case MVT::i8:
3041 if (!ST.hasP9Vector())
3042 return false;
3043 break;
3044 default:
3045 return false;
3046 }
3047
3048 SDValue LoadedVal(N, 0);
3049 if (!LoadedVal.hasOneUse())
3050 return false;
3051
3052 for (SDUse &Use : LD->uses())
3053 if (Use.getResNo() == 0 &&
3054 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3056 return false;
3057
3058 return true;
3059}
3060
3061/// getPreIndexedAddressParts - returns true by value, base pointer and
3062/// offset pointer and addressing mode by reference if the node's address
3063/// can be legally represented as pre-indexed load / store address.
3065 SDValue &Offset,
3067 SelectionDAG &DAG) const {
3068 if (DisablePPCPreinc) return false;
3069
3070 bool isLoad = true;
3071 SDValue Ptr;
3072 EVT VT;
3073 Align Alignment;
3074 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3075 Ptr = LD->getBasePtr();
3076 VT = LD->getMemoryVT();
3077 Alignment = LD->getAlign();
3078 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3079 Ptr = ST->getBasePtr();
3080 VT = ST->getMemoryVT();
3081 Alignment = ST->getAlign();
3082 isLoad = false;
3083 } else
3084 return false;
3085
3086 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3087 // instructions because we can fold these into a more efficient instruction
3088 // instead, (such as LXSD).
3089 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3090 return false;
3091 }
3092
3093 // PowerPC doesn't have preinc load/store instructions for vectors
3094 if (VT.isVector())
3095 return false;
3096
3097 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3098 // Common code will reject creating a pre-inc form if the base pointer
3099 // is a frame index, or if N is a store and the base pointer is either
3100 // the same as or a predecessor of the value being stored. Check for
3101 // those situations here, and try with swapped Base/Offset instead.
3102 bool Swap = false;
3103
3104 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3105 Swap = true;
3106 else if (!isLoad) {
3107 SDValue Val = cast<StoreSDNode>(N)->getValue();
3108 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3109 Swap = true;
3110 }
3111
3112 if (Swap)
3114
3115 AM = ISD::PRE_INC;
3116 return true;
3117 }
3118
3119 // LDU/STU can only handle immediates that are a multiple of 4.
3120 if (VT != MVT::i64) {
3121 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3122 return false;
3123 } else {
3124 // LDU/STU need an address with at least 4-byte alignment.
3125 if (Alignment < Align(4))
3126 return false;
3127
3128 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3129 return false;
3130 }
3131
3132 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3133 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3134 // sext i32 to i64 when addr mode is r+i.
3135 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3136 LD->getExtensionType() == ISD::SEXTLOAD &&
3137 isa<ConstantSDNode>(Offset))
3138 return false;
3139 }
3140
3141 AM = ISD::PRE_INC;
3142 return true;
3143}
3144
3145//===----------------------------------------------------------------------===//
3146// LowerOperation implementation
3147//===----------------------------------------------------------------------===//
3148
3149/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3150/// and LoOpFlags to the target MO flags.
3151static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3152 unsigned &HiOpFlags, unsigned &LoOpFlags,
3153 const GlobalValue *GV = nullptr) {
3154 HiOpFlags = PPCII::MO_HA;
3155 LoOpFlags = PPCII::MO_LO;
3156
3157 // Don't use the pic base if not in PIC relocation model.
3158 if (IsPIC) {
3159 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3160 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3161 }
3162}
3163
3164static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3165 SelectionDAG &DAG) {
3166 SDLoc DL(HiPart);
3167 EVT PtrVT = HiPart.getValueType();
3168 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3169
3170 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3171 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3172
3173 // With PIC, the first instruction is actually "GR+hi(&G)".
3174 if (isPIC)
3175 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3176 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3177
3178 // Generate non-pic code that has direct accesses to the constant pool.
3179 // The address of the global is just (hi(&g)+lo(&g)).
3180 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3181}
3182
3184 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3185 FuncInfo->setUsesTOCBasePtr();
3186}
3187
3190}
3191
3192SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3193 SDValue GA) const {
3194 EVT VT = Subtarget.getScalarIntVT();
3195 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3196 : Subtarget.isAIXABI()
3197 ? DAG.getRegister(PPC::R2, VT)
3198 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3199 SDValue Ops[] = { GA, Reg };
3200 return DAG.getMemIntrinsicNode(
3201 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3204}
3205
3206SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3207 SelectionDAG &DAG) const {
3208 EVT PtrVT = Op.getValueType();
3209 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3210 const Constant *C = CP->getConstVal();
3211
3212 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3213 // The actual address of the GlobalValue is stored in the TOC.
3214 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3215 if (Subtarget.isUsingPCRelativeCalls()) {
3216 SDLoc DL(CP);
3217 EVT Ty = getPointerTy(DAG.getDataLayout());
3218 SDValue ConstPool = DAG.getTargetConstantPool(
3219 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3220 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3221 }
3222 setUsesTOCBasePtr(DAG);
3223 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3224 return getTOCEntry(DAG, SDLoc(CP), GA);
3225 }
3226
3227 unsigned MOHiFlag, MOLoFlag;
3228 bool IsPIC = isPositionIndependent();
3229 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3230
3231 if (IsPIC && Subtarget.isSVR4ABI()) {
3232 SDValue GA =
3233 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3234 return getTOCEntry(DAG, SDLoc(CP), GA);
3235 }
3236
3237 SDValue CPIHi =
3238 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3239 SDValue CPILo =
3240 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3241 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3242}
3243
3244// For 64-bit PowerPC, prefer the more compact relative encodings.
3245// This trades 32 bits per jump table entry for one or two instructions
3246// on the jump site.
3248 if (isJumpTableRelative())
3250
3252}
3253
3256 return false;
3257 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3258 return true;
3260}
3261
3263 SelectionDAG &DAG) const {
3264 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3266
3267 switch (getTargetMachine().getCodeModel()) {
3268 case CodeModel::Small:
3269 case CodeModel::Medium:
3271 default:
3272 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3274 }
3275}
3276
3277const MCExpr *
3279 unsigned JTI,
3280 MCContext &Ctx) const {
3281 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3283
3284 switch (getTargetMachine().getCodeModel()) {
3285 case CodeModel::Small:
3286 case CodeModel::Medium:
3288 default:
3289 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3290 }
3291}
3292
3293SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3294 EVT PtrVT = Op.getValueType();
3295 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3296
3297 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3298 if (Subtarget.isUsingPCRelativeCalls()) {
3299 SDLoc DL(JT);
3300 EVT Ty = getPointerTy(DAG.getDataLayout());
3301 SDValue GA =
3302 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3303 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3304 return MatAddr;
3305 }
3306
3307 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3308 // The actual address of the GlobalValue is stored in the TOC.
3309 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3310 setUsesTOCBasePtr(DAG);
3311 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3312 return getTOCEntry(DAG, SDLoc(JT), GA);
3313 }
3314
3315 unsigned MOHiFlag, MOLoFlag;
3316 bool IsPIC = isPositionIndependent();
3317 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3318
3319 if (IsPIC && Subtarget.isSVR4ABI()) {
3320 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3322 return getTOCEntry(DAG, SDLoc(GA), GA);
3323 }
3324
3325 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3326 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3327 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3328}
3329
3330SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3331 SelectionDAG &DAG) const {
3332 EVT PtrVT = Op.getValueType();
3333 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3334 const BlockAddress *BA = BASDN->getBlockAddress();
3335
3336 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3337 if (Subtarget.isUsingPCRelativeCalls()) {
3338 SDLoc DL(BASDN);
3339 EVT Ty = getPointerTy(DAG.getDataLayout());
3340 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3342 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3343 return MatAddr;
3344 }
3345
3346 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3347 // The actual BlockAddress is stored in the TOC.
3348 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3349 setUsesTOCBasePtr(DAG);
3350 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3351 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3352 }
3353
3354 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3355 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3356 return getTOCEntry(
3357 DAG, SDLoc(BASDN),
3358 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3359
3360 unsigned MOHiFlag, MOLoFlag;
3361 bool IsPIC = isPositionIndependent();
3362 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3363 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3364 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3365 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3366}
3367
3368SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3369 SelectionDAG &DAG) const {
3370 if (Subtarget.isAIXABI())
3371 return LowerGlobalTLSAddressAIX(Op, DAG);
3372
3373 return LowerGlobalTLSAddressLinux(Op, DAG);
3374}
3375
3376/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3377/// and then apply the update.
3379 SelectionDAG &DAG,
3380 const TargetMachine &TM) {
3381 // Initialize TLS model opt setting lazily:
3382 // (1) Use initial-exec for single TLS var references within current function.
3383 // (2) Use local-dynamic for multiple TLS var references within current
3384 // function.
3385 PPCFunctionInfo *FuncInfo =
3387 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3389 // Iterate over all instructions within current function, collect all TLS
3390 // global variables (global variables taken as the first parameter to
3391 // Intrinsic::threadlocal_address).
3392 const Function &Func = DAG.getMachineFunction().getFunction();
3393 for (const BasicBlock &BB : Func)
3394 for (const Instruction &I : BB)
3395 if (I.getOpcode() == Instruction::Call)
3396 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3397 if (Function *CF = CI->getCalledFunction())
3398 if (CF->isDeclaration() &&
3399 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3400 if (const GlobalValue *GV =
3401 dyn_cast<GlobalValue>(I.getOperand(0))) {
3402 TLSModel::Model GVModel = TM.getTLSModel(GV);
3403 if (GVModel == TLSModel::LocalDynamic)
3404 TLSGV.insert(GV);
3405 }
3406
3407 unsigned TLSGVCnt = TLSGV.size();
3408 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3409 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3410 FuncInfo->setAIXFuncUseTLSIEForLD();
3412 }
3413
3414 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3415 LLVM_DEBUG(
3416 dbgs() << DAG.getMachineFunction().getName()
3417 << " function is using the TLS-IE model for TLS-LD access.\n");
3418 Model = TLSModel::InitialExec;
3419 }
3420}
3421
3422SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3423 SelectionDAG &DAG) const {
3424 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3425
3426 if (DAG.getTarget().useEmulatedTLS())
3427 report_fatal_error("Emulated TLS is not yet supported on AIX");
3428
3429 SDLoc dl(GA);
3430 const GlobalValue *GV = GA->getGlobal();
3431 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3432 bool Is64Bit = Subtarget.isPPC64();
3434
3435 // Apply update to the TLS model.
3436 if (Subtarget.hasAIXShLibTLSModelOpt())
3438
3439 // TLS variables are accessed through TOC entries.
3440 // To support this, set the DAG to use the TOC base pointer.
3441 setUsesTOCBasePtr(DAG);
3442
3443 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3444
3445 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3446 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3447 bool HasAIXSmallTLSGlobalAttr = false;
3448 SDValue VariableOffsetTGA =
3449 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3450 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3451 SDValue TLSReg;
3452
3453 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3454 if (GVar->hasAttribute("aix-small-tls"))
3455 HasAIXSmallTLSGlobalAttr = true;
3456
3457 if (Is64Bit) {
3458 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3459 // involves a load of the variable offset (from the TOC), followed by an
3460 // add of the loaded variable offset to R13 (the thread pointer).
3461 // This code sequence looks like:
3462 // ld reg1,var[TC](2)
3463 // add reg2, reg1, r13 // r13 contains the thread pointer
3464 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3465
3466 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3467 // global variable attribute, produce a faster access sequence for
3468 // local-exec TLS variables where the offset from the TLS base is encoded
3469 // as an immediate operand.
3470 //
3471 // We only utilize the faster local-exec access sequence when the TLS
3472 // variable has a size within the policy limit. We treat types that are
3473 // not sized or are empty as being over the policy size limit.
3474 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3475 IsTLSLocalExecModel) {
3476 Type *GVType = GV->getValueType();
3477 if (GVType->isSized() && !GVType->isEmptyTy() &&
3478 GV->getDataLayout().getTypeAllocSize(GVType) <=
3480 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3481 }
3482 } else {
3483 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3484 // involves loading the variable offset from the TOC, generating a call to
3485 // .__get_tpointer to get the thread pointer (which will be in R3), and
3486 // adding the two together:
3487 // lwz reg1,var[TC](2)
3488 // bla .__get_tpointer
3489 // add reg2, reg1, r3
3490 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3491
3492 // We do not implement the 32-bit version of the faster access sequence
3493 // for local-exec that is controlled by the -maix-small-local-exec-tls
3494 // option, or the "aix-small-tls" global variable attribute.
3495 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3496 report_fatal_error("The small-local-exec TLS access sequence is "
3497 "currently only supported on AIX (64-bit mode).");
3498 }
3499 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3500 }
3501
3502 if (Model == TLSModel::LocalDynamic) {
3503 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3504
3505 // We do not implement the 32-bit version of the faster access sequence
3506 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3507 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3508 report_fatal_error("The small-local-dynamic TLS access sequence is "
3509 "currently only supported on AIX (64-bit mode).");
3510
3511 // For local-dynamic on AIX, we need to generate one TOC entry for each
3512 // variable offset, and a single module-handle TOC entry for the entire
3513 // file.
3514
3515 SDValue VariableOffsetTGA =
3516 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3517 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3518
3520 GlobalVariable *TLSGV =
3521 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3522 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3524 assert(TLSGV && "Not able to create GV for _$TLSML.");
3525 SDValue ModuleHandleTGA =
3526 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3527 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3528 SDValue ModuleHandle =
3529 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3530
3531 // With the -maix-small-local-dynamic-tls option, produce a faster access
3532 // sequence for local-dynamic TLS variables where the offset from the
3533 // module-handle is encoded as an immediate operand.
3534 //
3535 // We only utilize the faster local-dynamic access sequence when the TLS
3536 // variable has a size within the policy limit. We treat types that are
3537 // not sized or are empty as being over the policy size limit.
3538 if (HasAIXSmallLocalDynamicTLS) {
3539 Type *GVType = GV->getValueType();
3540 if (GVType->isSized() && !GVType->isEmptyTy() &&
3541 GV->getDataLayout().getTypeAllocSize(GVType) <=
3543 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3544 ModuleHandle);
3545 }
3546
3547 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3548 }
3549
3550 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3551 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3552 // need to generate two TOC entries, one for the variable offset, one for the
3553 // region handle. The global address for the TOC entry of the region handle is
3554 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3555 // entry of the variable offset is created with MO_TLSGD_FLAG.
3556 SDValue VariableOffsetTGA =
3557 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3558 SDValue RegionHandleTGA =
3559 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3560 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3561 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3562 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3563 RegionHandle);
3564}
3565
3566SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3567 SelectionDAG &DAG) const {
3568 // FIXME: TLS addresses currently use medium model code sequences,
3569 // which is the most useful form. Eventually support for small and
3570 // large models could be added if users need it, at the cost of
3571 // additional complexity.
3572 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3573 if (DAG.getTarget().useEmulatedTLS())
3574 return LowerToTLSEmulatedModel(GA, DAG);
3575
3576 SDLoc dl(GA);
3577 const GlobalValue *GV = GA->getGlobal();
3578 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3579 bool is64bit = Subtarget.isPPC64();
3580 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3581 PICLevel::Level picLevel = M->getPICLevel();
3582
3584 TLSModel::Model Model = TM.getTLSModel(GV);
3585
3586 if (Model == TLSModel::LocalExec) {
3587 if (Subtarget.isUsingPCRelativeCalls()) {
3588 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3589 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3591 SDValue MatAddr =
3592 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3593 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3594 }
3595
3596 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3598 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3600 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3601 : DAG.getRegister(PPC::R2, MVT::i32);
3602
3603 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3604 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3605 }
3606
3607 if (Model == TLSModel::InitialExec) {
3608 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3610 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3611 SDValue TGATLS = DAG.getTargetGlobalAddress(
3612 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3613 SDValue TPOffset;
3614 if (IsPCRel) {
3615 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3616 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3618 } else {
3619 SDValue GOTPtr;
3620 if (is64bit) {
3621 setUsesTOCBasePtr(DAG);
3622 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3623 GOTPtr =
3624 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3625 } else {
3626 if (!TM.isPositionIndependent())
3627 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3628 else if (picLevel == PICLevel::SmallPIC)
3629 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3630 else
3631 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3632 }
3633 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3634 }
3635 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3636 }
3637
3638 if (Model == TLSModel::GeneralDynamic) {
3639 if (Subtarget.isUsingPCRelativeCalls()) {
3640 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3642 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3643 }
3644
3645 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3646 SDValue GOTPtr;
3647 if (is64bit) {
3648 setUsesTOCBasePtr(DAG);
3649 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3650 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3651 GOTReg, TGA);
3652 } else {
3653 if (picLevel == PICLevel::SmallPIC)
3654 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3655 else
3656 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3657 }
3658 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3659 GOTPtr, TGA, TGA);
3660 }
3661
3662 if (Model == TLSModel::LocalDynamic) {
3663 if (Subtarget.isUsingPCRelativeCalls()) {
3664 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3666 SDValue MatPCRel =
3667 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3668 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3669 }
3670
3671 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3672 SDValue GOTPtr;
3673 if (is64bit) {
3674 setUsesTOCBasePtr(DAG);
3675 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3676 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3677 GOTReg, TGA);
3678 } else {
3679 if (picLevel == PICLevel::SmallPIC)
3680 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3681 else
3682 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3683 }
3684 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3685 PtrVT, GOTPtr, TGA, TGA);
3686 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3687 PtrVT, TLSAddr, TGA);
3688 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3689 }
3690
3691 llvm_unreachable("Unknown TLS model!");
3692}
3693
3694SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3695 SelectionDAG &DAG) const {
3696 EVT PtrVT = Op.getValueType();
3697 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3698 SDLoc DL(GSDN);
3699 const GlobalValue *GV = GSDN->getGlobal();
3700
3701 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3702 // The actual address of the GlobalValue is stored in the TOC.
3703 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3704 if (Subtarget.isUsingPCRelativeCalls()) {
3705 EVT Ty = getPointerTy(DAG.getDataLayout());
3707 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3709 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3710 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3712 return Load;
3713 } else {
3714 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3716 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3717 }
3718 }
3719 setUsesTOCBasePtr(DAG);
3720 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3721 return getTOCEntry(DAG, DL, GA);
3722 }
3723
3724 unsigned MOHiFlag, MOLoFlag;
3725 bool IsPIC = isPositionIndependent();
3726 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3727
3728 if (IsPIC && Subtarget.isSVR4ABI()) {
3729 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3730 GSDN->getOffset(),
3732 return getTOCEntry(DAG, DL, GA);
3733 }
3734
3735 SDValue GAHi =
3736 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3737 SDValue GALo =
3738 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3739
3740 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3741}
3742
3743SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3744 bool IsStrict = Op->isStrictFPOpcode();
3746 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3747 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3748 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3749 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3750 EVT LHSVT = LHS.getValueType();
3751 SDLoc dl(Op);
3752
3753 // Soften the setcc with libcall if it is fp128.
3754 if (LHSVT == MVT::f128) {
3755 assert(!Subtarget.hasP9Vector() &&
3756 "SETCC for f128 is already legal under Power9!");
3757 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3758 Op->getOpcode() == ISD::STRICT_FSETCCS);
3759 if (RHS.getNode())
3760 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3761 DAG.getCondCode(CC));
3762 if (IsStrict)
3763 return DAG.getMergeValues({LHS, Chain}, dl);
3764 return LHS;
3765 }
3766
3767 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3768
3769 if (Op.getValueType() == MVT::v2i64) {
3770 // When the operands themselves are v2i64 values, we need to do something
3771 // special because VSX has no underlying comparison operations for these.
3772 if (LHS.getValueType() == MVT::v2i64) {
3773 // Equality can be handled by casting to the legal type for Altivec
3774 // comparisons, everything else needs to be expanded.
3775 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3776 return SDValue();
3777 SDValue SetCC32 = DAG.getSetCC(
3778 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3779 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3780 int ShuffV[] = {1, 0, 3, 2};
3781 SDValue Shuff =
3782 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3783 return DAG.getBitcast(MVT::v2i64,
3785 dl, MVT::v4i32, Shuff, SetCC32));
3786 }
3787
3788 // We handle most of these in the usual way.
3789 return Op;
3790 }
3791
3792 // If we're comparing for equality to zero, expose the fact that this is
3793 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3794 // fold the new nodes.
3795 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3796 return V;
3797
3798 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3799 // Leave comparisons against 0 and -1 alone for now, since they're usually
3800 // optimized. FIXME: revisit this when we can custom lower all setcc
3801 // optimizations.
3802 if (C->isAllOnes() || C->isZero())
3803 return SDValue();
3804 }
3805
3806 // If we have an integer seteq/setne, turn it into a compare against zero
3807 // by xor'ing the rhs with the lhs, which is faster than setting a
3808 // condition register, reading it back out, and masking the correct bit. The
3809 // normal approach here uses sub to do this instead of xor. Using xor exposes
3810 // the result to other bit-twiddling opportunities.
3811 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3812 EVT VT = Op.getValueType();
3813 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3814 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3815 }
3816 return SDValue();
3817}
3818
3819SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3820 SDNode *Node = Op.getNode();
3821 EVT VT = Node->getValueType(0);
3822 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3823 SDValue InChain = Node->getOperand(0);
3824 SDValue VAListPtr = Node->getOperand(1);
3825 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3826 SDLoc dl(Node);
3827
3828 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3829
3830 // gpr_index
3831 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3832 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3833 InChain = GprIndex.getValue(1);
3834
3835 if (VT == MVT::i64) {
3836 // Check if GprIndex is even
3837 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3838 DAG.getConstant(1, dl, MVT::i32));
3839 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3840 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3841 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3842 DAG.getConstant(1, dl, MVT::i32));
3843 // Align GprIndex to be even if it isn't
3844 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3845 GprIndex);
3846 }
3847
3848 // fpr index is 1 byte after gpr
3849 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3850 DAG.getConstant(1, dl, MVT::i32));
3851
3852 // fpr
3853 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3854 FprPtr, MachinePointerInfo(SV), MVT::i8);
3855 InChain = FprIndex.getValue(1);
3856
3857 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3858 DAG.getConstant(8, dl, MVT::i32));
3859
3860 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3861 DAG.getConstant(4, dl, MVT::i32));
3862
3863 // areas
3864 SDValue OverflowArea =
3865 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3866 InChain = OverflowArea.getValue(1);
3867
3868 SDValue RegSaveArea =
3869 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3870 InChain = RegSaveArea.getValue(1);
3871
3872 // select overflow_area if index > 8
3873 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3874 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3875
3876 // adjustment constant gpr_index * 4/8
3877 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3878 VT.isInteger() ? GprIndex : FprIndex,
3879 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3880 MVT::i32));
3881
3882 // OurReg = RegSaveArea + RegConstant
3883 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3884 RegConstant);
3885
3886 // Floating types are 32 bytes into RegSaveArea
3887 if (VT.isFloatingPoint())
3888 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3889 DAG.getConstant(32, dl, MVT::i32));
3890
3891 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3892 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3893 VT.isInteger() ? GprIndex : FprIndex,
3894 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3895 MVT::i32));
3896
3897 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3898 VT.isInteger() ? VAListPtr : FprPtr,
3899 MachinePointerInfo(SV), MVT::i8);
3900
3901 // determine if we should load from reg_save_area or overflow_area
3902 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3903
3904 // increase overflow_area by 4/8 if gpr/fpr > 8
3905 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3906 DAG.getConstant(VT.isInteger() ? 4 : 8,
3907 dl, MVT::i32));
3908
3909 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3910 OverflowAreaPlusN);
3911
3912 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3913 MachinePointerInfo(), MVT::i32);
3914
3915 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3916}
3917
3918SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3919 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3920
3921 // We have to copy the entire va_list struct:
3922 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3923 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3924 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3925 false, true, /*CI=*/nullptr, std::nullopt,
3927}
3928
3929SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3930 SelectionDAG &DAG) const {
3931 if (Subtarget.isAIXABI())
3932 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3933
3934 return Op.getOperand(0);
3935}
3936
3937SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3940
3941 assert((Op.getOpcode() == ISD::INLINEASM ||
3942 Op.getOpcode() == ISD::INLINEASM_BR) &&
3943 "Expecting Inline ASM node.");
3944
3945 // If an LR store is already known to be required then there is not point in
3946 // checking this ASM as well.
3947 if (MFI.isLRStoreRequired())
3948 return Op;
3949
3950 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3951 // type MVT::Glue. We want to ignore this last operand if that is the case.
3952 unsigned NumOps = Op.getNumOperands();
3953 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3954 --NumOps;
3955
3956 // Check all operands that may contain the LR.
3957 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3958 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3959 unsigned NumVals = Flags.getNumOperandRegisters();
3960 ++i; // Skip the ID value.
3961
3962 switch (Flags.getKind()) {
3963 default:
3964 llvm_unreachable("Bad flags!");
3968 i += NumVals;
3969 break;
3973 for (; NumVals; --NumVals, ++i) {
3974 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3975 if (Reg != PPC::LR && Reg != PPC::LR8)
3976 continue;
3977 MFI.setLRStoreRequired();
3978 return Op;
3979 }
3980 break;
3981 }
3982 }
3983 }
3984
3985 return Op;
3986}
3987
3988SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3989 SelectionDAG &DAG) const {
3990 if (Subtarget.isAIXABI())
3991 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3992
3993 SDValue Chain = Op.getOperand(0);
3994 SDValue Trmp = Op.getOperand(1); // trampoline
3995 SDValue FPtr = Op.getOperand(2); // nested function
3996 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3997 SDLoc dl(Op);
3998
3999 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4000 bool isPPC64 = (PtrVT == MVT::i64);
4001 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
4002
4005
4006 Entry.Ty = IntPtrTy;
4007 Entry.Node = Trmp; Args.push_back(Entry);
4008
4009 // TrampSize == (isPPC64 ? 48 : 40);
4010 Entry.Node =
4011 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT());
4012 Args.push_back(Entry);
4013
4014 Entry.Node = FPtr; Args.push_back(Entry);
4015 Entry.Node = Nest; Args.push_back(Entry);
4016
4017 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4019 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4021 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4022
4023 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4024 return CallResult.second;
4025}
4026
4027SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4029 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4030 EVT PtrVT = getPointerTy(MF.getDataLayout());
4031
4032 SDLoc dl(Op);
4033
4034 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4035 // vastart just stores the address of the VarArgsFrameIndex slot into the
4036 // memory location argument.
4037 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4038 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4039 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4040 MachinePointerInfo(SV));
4041 }
4042
4043 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4044 // We suppose the given va_list is already allocated.
4045 //
4046 // typedef struct {
4047 // char gpr; /* index into the array of 8 GPRs
4048 // * stored in the register save area
4049 // * gpr=0 corresponds to r3,
4050 // * gpr=1 to r4, etc.
4051 // */
4052 // char fpr; /* index into the array of 8 FPRs
4053 // * stored in the register save area
4054 // * fpr=0 corresponds to f1,
4055 // * fpr=1 to f2, etc.
4056 // */
4057 // char *overflow_arg_area;
4058 // /* location on stack that holds
4059 // * the next overflow argument
4060 // */
4061 // char *reg_save_area;
4062 // /* where r3:r10 and f1:f8 (if saved)
4063 // * are stored
4064 // */
4065 // } va_list[1];
4066
4067 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4068 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4069 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4070 PtrVT);
4071 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4072 PtrVT);
4073
4074 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4075 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4076
4077 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4078 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4079
4080 uint64_t FPROffset = 1;
4081 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4082
4083 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4084
4085 // Store first byte : number of int regs
4086 SDValue firstStore =
4087 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4088 MachinePointerInfo(SV), MVT::i8);
4089 uint64_t nextOffset = FPROffset;
4090 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4091 ConstFPROffset);
4092
4093 // Store second byte : number of float regs
4094 SDValue secondStore =
4095 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4096 MachinePointerInfo(SV, nextOffset), MVT::i8);
4097 nextOffset += StackOffset;
4098 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4099
4100 // Store second word : arguments given on stack
4101 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4102 MachinePointerInfo(SV, nextOffset));
4103 nextOffset += FrameOffset;
4104 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4105
4106 // Store third word : arguments given in registers
4107 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4108 MachinePointerInfo(SV, nextOffset));
4109}
4110
4111/// FPR - The set of FP registers that should be allocated for arguments
4112/// on Darwin and AIX.
4113static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4114 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4115 PPC::F11, PPC::F12, PPC::F13};
4116
4117/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4118/// the stack.
4119static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4120 unsigned PtrByteSize) {
4121 unsigned ArgSize = ArgVT.getStoreSize();
4122 if (Flags.isByVal())
4123 ArgSize = Flags.getByValSize();
4124
4125 // Round up to multiples of the pointer size, except for array members,
4126 // which are always packed.
4127 if (!Flags.isInConsecutiveRegs())
4128 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4129
4130 return ArgSize;
4131}
4132
4133/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4134/// on the stack.
4136 ISD::ArgFlagsTy Flags,
4137 unsigned PtrByteSize) {
4138 Align Alignment(PtrByteSize);
4139
4140 // Altivec parameters are padded to a 16 byte boundary.
4141 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4142 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4143 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4144 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4145 Alignment = Align(16);
4146
4147 // ByVal parameters are aligned as requested.
4148 if (Flags.isByVal()) {
4149 auto BVAlign = Flags.getNonZeroByValAlign();
4150 if (BVAlign > PtrByteSize) {
4151 if (BVAlign.value() % PtrByteSize != 0)
4153 "ByVal alignment is not a multiple of the pointer size");
4154
4155 Alignment = BVAlign;
4156 }
4157 }
4158
4159 // Array members are always packed to their original alignment.
4160 if (Flags.isInConsecutiveRegs()) {
4161 // If the array member was split into multiple registers, the first
4162 // needs to be aligned to the size of the full type. (Except for
4163 // ppcf128, which is only aligned as its f64 components.)
4164 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4165 Alignment = Align(OrigVT.getStoreSize());
4166 else
4167 Alignment = Align(ArgVT.getStoreSize());
4168 }
4169
4170 return Alignment;
4171}
4172
4173/// CalculateStackSlotUsed - Return whether this argument will use its
4174/// stack slot (instead of being passed in registers). ArgOffset,
4175/// AvailableFPRs, and AvailableVRs must hold the current argument
4176/// position, and will be updated to account for this argument.
4177static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4178 unsigned PtrByteSize, unsigned LinkageSize,
4179 unsigned ParamAreaSize, unsigned &ArgOffset,
4180 unsigned &AvailableFPRs,
4181 unsigned &AvailableVRs) {
4182 bool UseMemory = false;
4183
4184 // Respect alignment of argument on the stack.
4185 Align Alignment =
4186 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4187 ArgOffset = alignTo(ArgOffset, Alignment);
4188 // If there's no space left in the argument save area, we must
4189 // use memory (this check also catches zero-sized arguments).
4190 if (ArgOffset >= LinkageSize + ParamAreaSize)
4191 UseMemory = true;
4192
4193 // Allocate argument on the stack.
4194 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4195 if (Flags.isInConsecutiveRegsLast())
4196 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4197 // If we overran the argument save area, we must use memory
4198 // (this check catches arguments passed partially in memory)
4199 if (ArgOffset > LinkageSize + ParamAreaSize)
4200 UseMemory = true;
4201
4202 // However, if the argument is actually passed in an FPR or a VR,
4203 // we don't use memory after all.
4204 if (!Flags.isByVal()) {
4205 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4206 if (AvailableFPRs > 0) {
4207 --AvailableFPRs;
4208 return false;
4209 }
4210 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4211 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4212 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4213 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4214 if (AvailableVRs > 0) {
4215 --AvailableVRs;
4216 return false;
4217 }
4218 }
4219
4220 return UseMemory;
4221}
4222
4223/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4224/// ensure minimum alignment required for target.
4226 unsigned NumBytes) {
4227 return alignTo(NumBytes, Lowering->getStackAlign());
4228}
4229
4230SDValue PPCTargetLowering::LowerFormalArguments(
4231 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4232 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4233 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4234 if (Subtarget.isAIXABI())
4235 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4236 InVals);
4237 if (Subtarget.is64BitELFABI())
4238 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4239 InVals);
4240 assert(Subtarget.is32BitELFABI());
4241 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4242 InVals);
4243}
4244
4245SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4246 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4247 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4248 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4249
4250 // 32-bit SVR4 ABI Stack Frame Layout:
4251 // +-----------------------------------+
4252 // +--> | Back chain |
4253 // | +-----------------------------------+
4254 // | | Floating-point register save area |
4255 // | +-----------------------------------+
4256 // | | General register save area |
4257 // | +-----------------------------------+
4258 // | | CR save word |
4259 // | +-----------------------------------+
4260 // | | VRSAVE save word |
4261 // | +-----------------------------------+
4262 // | | Alignment padding |
4263 // | +-----------------------------------+
4264 // | | Vector register save area |
4265 // | +-----------------------------------+
4266 // | | Local variable space |
4267 // | +-----------------------------------+
4268 // | | Parameter list area |
4269 // | +-----------------------------------+
4270 // | | LR save word |
4271 // | +-----------------------------------+
4272 // SP--> +--- | Back chain |
4273 // +-----------------------------------+
4274 //
4275 // Specifications:
4276 // System V Application Binary Interface PowerPC Processor Supplement
4277 // AltiVec Technology Programming Interface Manual
4278
4280 MachineFrameInfo &MFI = MF.getFrameInfo();
4281 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4282
4283 EVT PtrVT = getPointerTy(MF.getDataLayout());
4284 // Potential tail calls could cause overwriting of argument stack slots.
4285 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4286 (CallConv == CallingConv::Fast));
4287 const Align PtrAlign(4);
4288
4289 // Assign locations to all of the incoming arguments.
4291 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4292 *DAG.getContext());
4293
4294 // Reserve space for the linkage area on the stack.
4295 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4296 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4297 if (useSoftFloat())
4298 CCInfo.PreAnalyzeFormalArguments(Ins);
4299
4300 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4301 CCInfo.clearWasPPCF128();
4302
4303 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4304 CCValAssign &VA = ArgLocs[i];
4305
4306 // Arguments stored in registers.
4307 if (VA.isRegLoc()) {
4308 const TargetRegisterClass *RC;
4309 EVT ValVT = VA.getValVT();
4310
4311 switch (ValVT.getSimpleVT().SimpleTy) {
4312 default:
4313 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4314 case MVT::i1:
4315 case MVT::i32:
4316 RC = &PPC::GPRCRegClass;
4317 break;
4318 case MVT::f32:
4319 if (Subtarget.hasP8Vector())
4320 RC = &PPC::VSSRCRegClass;
4321 else if (Subtarget.hasSPE())
4322 RC = &PPC::GPRCRegClass;
4323 else
4324 RC = &PPC::F4RCRegClass;
4325 break;
4326 case MVT::f64:
4327 if (Subtarget.hasVSX())
4328 RC = &PPC::VSFRCRegClass;
4329 else if (Subtarget.hasSPE())
4330 // SPE passes doubles in GPR pairs.
4331 RC = &PPC::GPRCRegClass;
4332 else
4333 RC = &PPC::F8RCRegClass;
4334 break;
4335 case MVT::v16i8:
4336 case MVT::v8i16:
4337 case MVT::v4i32:
4338 RC = &PPC::VRRCRegClass;
4339 break;
4340 case MVT::v4f32:
4341 RC = &PPC::VRRCRegClass;
4342 break;
4343 case MVT::v2f64:
4344 case MVT::v2i64:
4345 RC = &PPC::VRRCRegClass;
4346 break;
4347 }
4348
4349 SDValue ArgValue;
4350 // Transform the arguments stored in physical registers into
4351 // virtual ones.
4352 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4353 assert(i + 1 < e && "No second half of double precision argument");
4354 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4355 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4356 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4357 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4358 if (!Subtarget.isLittleEndian())
4359 std::swap (ArgValueLo, ArgValueHi);
4360 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4361 ArgValueHi);
4362 } else {
4363 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4364 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4365 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4366 if (ValVT == MVT::i1)
4367 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4368 }
4369
4370 InVals.push_back(ArgValue);
4371 } else {
4372 // Argument stored in memory.
4373 assert(VA.isMemLoc());
4374
4375 // Get the extended size of the argument type in stack
4376 unsigned ArgSize = VA.getLocVT().getStoreSize();
4377 // Get the actual size of the argument type
4378 unsigned ObjSize = VA.getValVT().getStoreSize();
4379 unsigned ArgOffset = VA.getLocMemOffset();
4380 // Stack objects in PPC32 are right justified.
4381 ArgOffset += ArgSize - ObjSize;
4382 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4383
4384 // Create load nodes to retrieve arguments from the stack.
4385 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4386 InVals.push_back(
4387 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4388 }
4389 }
4390
4391 // Assign locations to all of the incoming aggregate by value arguments.
4392 // Aggregates passed by value are stored in the local variable space of the
4393 // caller's stack frame, right above the parameter list area.
4394 SmallVector<CCValAssign, 16> ByValArgLocs;
4395 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4396 ByValArgLocs, *DAG.getContext());
4397
4398 // Reserve stack space for the allocations in CCInfo.
4399 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4400
4401 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4402
4403 // Area that is at least reserved in the caller of this function.
4404 unsigned MinReservedArea = CCByValInfo.getStackSize();
4405 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4406
4407 // Set the size that is at least reserved in caller of this function. Tail
4408 // call optimized function's reserved stack space needs to be aligned so that
4409 // taking the difference between two stack areas will result in an aligned
4410 // stack.
4411 MinReservedArea =
4412 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4413 FuncInfo->setMinReservedArea(MinReservedArea);
4414
4416
4417 // If the function takes variable number of arguments, make a frame index for
4418 // the start of the first vararg value... for expansion of llvm.va_start.
4419 if (isVarArg) {
4420 static const MCPhysReg GPArgRegs[] = {
4421 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4422 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4423 };
4424 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4425
4426 static const MCPhysReg FPArgRegs[] = {
4427 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4428 PPC::F8
4429 };
4430 unsigned NumFPArgRegs = std::size(FPArgRegs);
4431
4432 if (useSoftFloat() || hasSPE())
4433 NumFPArgRegs = 0;
4434
4435 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4436 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4437
4438 // Make room for NumGPArgRegs and NumFPArgRegs.
4439 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4440 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4441
4443 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4444
4445 FuncInfo->setVarArgsFrameIndex(
4446 MFI.CreateStackObject(Depth, Align(8), false));
4447 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4448
4449 // The fixed integer arguments of a variadic function are stored to the
4450 // VarArgsFrameIndex on the stack so that they may be loaded by
4451 // dereferencing the result of va_next.
4452 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4453 // Get an existing live-in vreg, or add a new one.
4454 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4455 if (!VReg)
4456 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4457
4458 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4459 SDValue Store =
4460 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4461 MemOps.push_back(Store);
4462 // Increment the address by four for the next argument to store
4463 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4464 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4465 }
4466
4467 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4468 // is set.
4469 // The double arguments are stored to the VarArgsFrameIndex
4470 // on the stack.
4471 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4472 // Get an existing live-in vreg, or add a new one.
4473 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4474 if (!VReg)
4475 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4476
4477 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4478 SDValue Store =
4479 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4480 MemOps.push_back(Store);
4481 // Increment the address by eight for the next argument to store
4482 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4483 PtrVT);
4484 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4485 }
4486 }
4487
4488 if (!MemOps.empty())
4489 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4490
4491 return Chain;
4492}
4493
4494// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4495// value to MVT::i64 and then truncate to the correct register size.
4496SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4497 EVT ObjectVT, SelectionDAG &DAG,
4498 SDValue ArgVal,
4499 const SDLoc &dl) const {
4500 if (Flags.isSExt())
4501 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4502 DAG.getValueType(ObjectVT));
4503 else if (Flags.isZExt())
4504 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4505 DAG.getValueType(ObjectVT));
4506
4507 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4508}
4509
4510SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4511 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4512 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4513 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4514 // TODO: add description of PPC stack frame format, or at least some docs.
4515 //
4516 bool isELFv2ABI = Subtarget.isELFv2ABI();
4517 bool isLittleEndian = Subtarget.isLittleEndian();
4519 MachineFrameInfo &MFI = MF.getFrameInfo();
4520 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4521
4522 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4523 "fastcc not supported on varargs functions");
4524
4525 EVT PtrVT = getPointerTy(MF.getDataLayout());
4526 // Potential tail calls could cause overwriting of argument stack slots.
4527 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4528 (CallConv == CallingConv::Fast));
4529 unsigned PtrByteSize = 8;
4530 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4531
4532 static const MCPhysReg GPR[] = {
4533 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4534 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4535 };
4536 static const MCPhysReg VR[] = {
4537 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4538 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4539 };
4540
4541 const unsigned Num_GPR_Regs = std::size(GPR);
4542 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4543 const unsigned Num_VR_Regs = std::size(VR);
4544
4545 // Do a first pass over the arguments to determine whether the ABI
4546 // guarantees that our caller has allocated the parameter save area
4547 // on its stack frame. In the ELFv1 ABI, this is always the case;
4548 // in the ELFv2 ABI, it is true if this is a vararg function or if
4549 // any parameter is located in a stack slot.
4550
4551 bool HasParameterArea = !isELFv2ABI || isVarArg;
4552 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4553 unsigned NumBytes = LinkageSize;
4554 unsigned AvailableFPRs = Num_FPR_Regs;
4555 unsigned AvailableVRs = Num_VR_Regs;
4556 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4557 if (Ins[i].Flags.isNest())
4558 continue;
4559
4560 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4561 PtrByteSize, LinkageSize, ParamAreaSize,
4562 NumBytes, AvailableFPRs, AvailableVRs))
4563 HasParameterArea = true;
4564 }
4565
4566 // Add DAG nodes to load the arguments or copy them out of registers. On
4567 // entry to a function on PPC, the arguments start after the linkage area,
4568 // although the first ones are often in registers.
4569
4570 unsigned ArgOffset = LinkageSize;
4571 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4574 unsigned CurArgIdx = 0;
4575 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4576 SDValue ArgVal;
4577 bool needsLoad = false;
4578 EVT ObjectVT = Ins[ArgNo].VT;
4579 EVT OrigVT = Ins[ArgNo].ArgVT;
4580 unsigned ObjSize = ObjectVT.getStoreSize();
4581 unsigned ArgSize = ObjSize;
4582 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4583 if (Ins[ArgNo].isOrigArg()) {
4584 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4585 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4586 }
4587 // We re-align the argument offset for each argument, except when using the
4588 // fast calling convention, when we need to make sure we do that only when
4589 // we'll actually use a stack slot.
4590 unsigned CurArgOffset;
4591 Align Alignment;
4592 auto ComputeArgOffset = [&]() {
4593 /* Respect alignment of argument on the stack. */
4594 Alignment =
4595 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4596 ArgOffset = alignTo(ArgOffset, Alignment);
4597 CurArgOffset = ArgOffset;
4598 };
4599
4600 if (CallConv != CallingConv::Fast) {
4601 ComputeArgOffset();
4602
4603 /* Compute GPR index associated with argument offset. */
4604 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4605 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4606 }
4607
4608 // FIXME the codegen can be much improved in some cases.
4609 // We do not have to keep everything in memory.
4610 if (Flags.isByVal()) {
4611 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4612
4613 if (CallConv == CallingConv::Fast)
4614 ComputeArgOffset();
4615
4616 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4617 ObjSize = Flags.getByValSize();
4618 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4619 // Empty aggregate parameters do not take up registers. Examples:
4620 // struct { } a;
4621 // union { } b;
4622 // int c[0];
4623 // etc. However, we have to provide a place-holder in InVals, so
4624 // pretend we have an 8-byte item at the current address for that
4625 // purpose.
4626 if (!ObjSize) {
4627 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4628 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4629 InVals.push_back(FIN);
4630 continue;
4631 }
4632
4633 // Create a stack object covering all stack doublewords occupied
4634 // by the argument. If the argument is (fully or partially) on
4635 // the stack, or if the argument is fully in registers but the
4636 // caller has allocated the parameter save anyway, we can refer
4637 // directly to the caller's stack frame. Otherwise, create a
4638 // local copy in our own frame.
4639 int FI;
4640 if (HasParameterArea ||
4641 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4642 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4643 else
4644 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4645 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4646
4647 // Handle aggregates smaller than 8 bytes.
4648 if (ObjSize < PtrByteSize) {
4649 // The value of the object is its address, which differs from the
4650 // address of the enclosing doubleword on big-endian systems.
4651 SDValue Arg = FIN;
4652 if (!isLittleEndian) {
4653 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4654 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4655 }
4656 InVals.push_back(Arg);
4657
4658 if (GPR_idx != Num_GPR_Regs) {
4659 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4660 FuncInfo->addLiveInAttr(VReg, Flags);
4661 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4662 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4663 SDValue Store =
4664 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4665 MachinePointerInfo(&*FuncArg), ObjType);
4666 MemOps.push_back(Store);
4667 }
4668 // Whether we copied from a register or not, advance the offset
4669 // into the parameter save area by a full doubleword.
4670 ArgOffset += PtrByteSize;
4671 continue;
4672 }
4673
4674 // The value of the object is its address, which is the address of
4675 // its first stack doubleword.
4676 InVals.push_back(FIN);
4677
4678 // Store whatever pieces of the object are in registers to memory.
4679 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4680 if (GPR_idx == Num_GPR_Regs)
4681 break;
4682
4683 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4684 FuncInfo->addLiveInAttr(VReg, Flags);
4685 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4686 SDValue Addr = FIN;
4687 if (j) {
4688 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4689 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4690 }
4691 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4692 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4693 SDValue Store =
4694 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4695 MachinePointerInfo(&*FuncArg, j), ObjType);
4696 MemOps.push_back(Store);
4697 ++GPR_idx;
4698 }
4699 ArgOffset += ArgSize;
4700 continue;
4701 }
4702
4703 switch (ObjectVT.getSimpleVT().SimpleTy) {
4704 default: llvm_unreachable("Unhandled argument type!");
4705 case MVT::i1:
4706 case MVT::i32:
4707 case MVT::i64:
4708 if (Flags.isNest()) {
4709 // The 'nest' parameter, if any, is passed in R11.
4710 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4711 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4712
4713 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4714 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4715
4716 break;
4717 }
4718
4719 // These can be scalar arguments or elements of an integer array type
4720 // passed directly. Clang may use those instead of "byval" aggregate
4721 // types to avoid forcing arguments to memory unnecessarily.
4722 if (GPR_idx != Num_GPR_Regs) {
4723 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4724 FuncInfo->addLiveInAttr(VReg, Flags);
4725 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4726
4727 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4728 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4729 // value to MVT::i64 and then truncate to the correct register size.
4730 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4731 } else {
4732 if (CallConv == CallingConv::Fast)
4733 ComputeArgOffset();
4734
4735 needsLoad = true;
4736 ArgSize = PtrByteSize;
4737 }
4738 if (CallConv != CallingConv::Fast || needsLoad)
4739 ArgOffset += 8;
4740 break;
4741
4742 case MVT::f32:
4743 case MVT::f64:
4744 // These can be scalar arguments or elements of a float array type
4745 // passed directly. The latter are used to implement ELFv2 homogenous
4746 // float aggregates.
4747 if (FPR_idx != Num_FPR_Regs) {
4748 unsigned VReg;
4749
4750 if (ObjectVT == MVT::f32)
4751 VReg = MF.addLiveIn(FPR[FPR_idx],
4752 Subtarget.hasP8Vector()
4753 ? &PPC::VSSRCRegClass
4754 : &PPC::F4RCRegClass);
4755 else
4756 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4757 ? &PPC::VSFRCRegClass
4758 : &PPC::F8RCRegClass);
4759
4760 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4761 ++FPR_idx;
4762 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4763 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4764 // once we support fp <-> gpr moves.
4765
4766 // This can only ever happen in the presence of f32 array types,
4767 // since otherwise we never run out of FPRs before running out
4768 // of GPRs.
4769 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4770 FuncInfo->addLiveInAttr(VReg, Flags);
4771 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4772
4773 if (ObjectVT == MVT::f32) {
4774 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4775 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4776 DAG.getConstant(32, dl, MVT::i32));
4777 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4778 }
4779
4780 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4781 } else {
4782 if (CallConv == CallingConv::Fast)
4783 ComputeArgOffset();
4784
4785 needsLoad = true;
4786 }
4787
4788 // When passing an array of floats, the array occupies consecutive
4789 // space in the argument area; only round up to the next doubleword
4790 // at the end of the array. Otherwise, each float takes 8 bytes.
4791 if (CallConv != CallingConv::Fast || needsLoad) {
4792 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4793 ArgOffset += ArgSize;
4794 if (Flags.isInConsecutiveRegsLast())
4795 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4796 }
4797 break;
4798 case MVT::v4f32:
4799 case MVT::v4i32:
4800 case MVT::v8i16:
4801 case MVT::v16i8:
4802 case MVT::v2f64:
4803 case MVT::v2i64:
4804 case MVT::v1i128:
4805 case MVT::f128:
4806 // These can be scalar arguments or elements of a vector array type
4807 // passed directly. The latter are used to implement ELFv2 homogenous
4808 // vector aggregates.
4809 if (VR_idx != Num_VR_Regs) {
4810 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4811 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4812 ++VR_idx;
4813 } else {
4814 if (CallConv == CallingConv::Fast)
4815 ComputeArgOffset();
4816 needsLoad = true;
4817 }
4818 if (CallConv != CallingConv::Fast || needsLoad)
4819 ArgOffset += 16;
4820 break;
4821 }
4822
4823 // We need to load the argument to a virtual register if we determined
4824 // above that we ran out of physical registers of the appropriate type.
4825 if (needsLoad) {
4826 if (ObjSize < ArgSize && !isLittleEndian)
4827 CurArgOffset += ArgSize - ObjSize;
4828 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4829 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4830 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4831 }
4832
4833 InVals.push_back(ArgVal);
4834 }
4835
4836 // Area that is at least reserved in the caller of this function.
4837 unsigned MinReservedArea;
4838 if (HasParameterArea)
4839 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4840 else
4841 MinReservedArea = LinkageSize;
4842
4843 // Set the size that is at least reserved in caller of this function. Tail
4844 // call optimized functions' reserved stack space needs to be aligned so that
4845 // taking the difference between two stack areas will result in an aligned
4846 // stack.
4847 MinReservedArea =
4848 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4849 FuncInfo->setMinReservedArea(MinReservedArea);
4850
4851 // If the function takes variable number of arguments, make a frame index for
4852 // the start of the first vararg value... for expansion of llvm.va_start.
4853 // On ELFv2ABI spec, it writes:
4854 // C programs that are intended to be *portable* across different compilers
4855 // and architectures must use the header file <stdarg.h> to deal with variable
4856 // argument lists.
4857 if (isVarArg && MFI.hasVAStart()) {
4858 int Depth = ArgOffset;
4859
4860 FuncInfo->setVarArgsFrameIndex(
4861 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4862 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4863
4864 // If this function is vararg, store any remaining integer argument regs
4865 // to their spots on the stack so that they may be loaded by dereferencing
4866 // the result of va_next.
4867 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4868 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4869 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4870 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4871 SDValue Store =
4872 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4873 MemOps.push_back(Store);
4874 // Increment the address by four for the next argument to store
4875 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4876 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4877 }
4878 }
4879
4880 if (!MemOps.empty())
4881 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4882
4883 return Chain;
4884}
4885
4886/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4887/// adjusted to accommodate the arguments for the tailcall.
4888static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4889 unsigned ParamSize) {
4890
4891 if (!isTailCall) return 0;
4892
4894 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4895 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4896 // Remember only if the new adjustment is bigger.
4897 if (SPDiff < FI->getTailCallSPDelta())
4898 FI->setTailCallSPDelta(SPDiff);
4899
4900 return SPDiff;
4901}
4902
4903static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4904
4905static bool callsShareTOCBase(const Function *Caller,
4906 const GlobalValue *CalleeGV,
4907 const TargetMachine &TM) {
4908 // It does not make sense to call callsShareTOCBase() with a caller that
4909 // is PC Relative since PC Relative callers do not have a TOC.
4910#ifndef NDEBUG
4911 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4912 assert(!STICaller->isUsingPCRelativeCalls() &&
4913 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4914#endif
4915
4916 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4917 // don't have enough information to determine if the caller and callee share
4918 // the same TOC base, so we have to pessimistically assume they don't for
4919 // correctness.
4920 if (!CalleeGV)
4921 return false;
4922
4923 // If the callee is preemptable, then the static linker will use a plt-stub
4924 // which saves the toc to the stack, and needs a nop after the call
4925 // instruction to convert to a toc-restore.
4926 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4927 return false;
4928
4929 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4930 // We may need a TOC restore in the situation where the caller requires a
4931 // valid TOC but the callee is PC Relative and does not.
4932 const Function *F = dyn_cast<Function>(CalleeGV);
4933 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4934
4935 // If we have an Alias we can try to get the function from there.
4936 if (Alias) {
4937 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4938 F = dyn_cast<Function>(GlobalObj);
4939 }
4940
4941 // If we still have no valid function pointer we do not have enough
4942 // information to determine if the callee uses PC Relative calls so we must
4943 // assume that it does.
4944 if (!F)
4945 return false;
4946
4947 // If the callee uses PC Relative we cannot guarantee that the callee won't
4948 // clobber the TOC of the caller and so we must assume that the two
4949 // functions do not share a TOC base.
4950 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4951 if (STICallee->isUsingPCRelativeCalls())
4952 return false;
4953
4954 // If the GV is not a strong definition then we need to assume it can be
4955 // replaced by another function at link time. The function that replaces
4956 // it may not share the same TOC as the caller since the callee may be
4957 // replaced by a PC Relative version of the same function.
4958 if (!CalleeGV->isStrongDefinitionForLinker())
4959 return false;
4960
4961 // The medium and large code models are expected to provide a sufficiently
4962 // large TOC to provide all data addressing needs of a module with a
4963 // single TOC.
4964 if (CodeModel::Medium == TM.getCodeModel() ||
4965 CodeModel::Large == TM.getCodeModel())
4966 return true;
4967
4968 // Any explicitly-specified sections and section prefixes must also match.
4969 // Also, if we're using -ffunction-sections, then each function is always in
4970 // a different section (the same is true for COMDAT functions).
4971 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4972 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4973 return false;
4974 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4975 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4976 return false;
4977 }
4978
4979 return true;
4980}
4981
4982static bool
4984 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4985 assert(Subtarget.is64BitELFABI());
4986
4987 const unsigned PtrByteSize = 8;
4988 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4989
4990 static const MCPhysReg GPR[] = {
4991 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4992 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4993 };
4994 static const MCPhysReg VR[] = {
4995 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4996 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4997 };
4998
4999 const unsigned NumGPRs = std::size(GPR);
5000 const unsigned NumFPRs = 13;
5001 const unsigned NumVRs = std::size(VR);
5002 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5003
5004 unsigned NumBytes = LinkageSize;
5005 unsigned AvailableFPRs = NumFPRs;
5006 unsigned AvailableVRs = NumVRs;
5007
5008 for (const ISD::OutputArg& Param : Outs) {
5009 if (Param.Flags.isNest()) continue;
5010
5011 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5012 LinkageSize, ParamAreaSize, NumBytes,
5013 AvailableFPRs, AvailableVRs))
5014 return true;
5015 }
5016 return false;
5017}
5018
5019static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5020 if (CB.arg_size() != CallerFn->arg_size())
5021 return false;
5022
5023 auto CalleeArgIter = CB.arg_begin();
5024 auto CalleeArgEnd = CB.arg_end();
5025 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5026
5027 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5028 const Value* CalleeArg = *CalleeArgIter;
5029 const Value* CallerArg = &(*CallerArgIter);
5030 if (CalleeArg == CallerArg)
5031 continue;
5032
5033 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5034 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5035 // }
5036 // 1st argument of callee is undef and has the same type as caller.
5037 if (CalleeArg->getType() == CallerArg->getType() &&
5038 isa<UndefValue>(CalleeArg))
5039 continue;
5040
5041 return false;
5042 }
5043
5044 return true;
5045}
5046
5047// Returns true if TCO is possible between the callers and callees
5048// calling conventions.
5049static bool
5051 CallingConv::ID CalleeCC) {
5052 // Tail calls are possible with fastcc and ccc.
5053 auto isTailCallableCC = [] (CallingConv::ID CC){
5054 return CC == CallingConv::C || CC == CallingConv::Fast;
5055 };
5056 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5057 return false;
5058
5059 // We can safely tail call both fastcc and ccc callees from a c calling
5060 // convention caller. If the caller is fastcc, we may have less stack space
5061 // than a non-fastcc caller with the same signature so disable tail-calls in
5062 // that case.
5063 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5064}
5065
5066bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5067 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5068 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5070 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5071 bool isCalleeExternalSymbol) const {
5072 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5073
5074 if (DisableSCO && !TailCallOpt) return false;
5075
5076 // Variadic argument functions are not supported.
5077 if (isVarArg) return false;
5078
5079 // Check that the calling conventions are compatible for tco.
5080 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5081 return false;
5082
5083 // Caller contains any byval parameter is not supported.
5084 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5085 return false;
5086
5087 // Callee contains any byval parameter is not supported, too.
5088 // Note: This is a quick work around, because in some cases, e.g.
5089 // caller's stack size > callee's stack size, we are still able to apply
5090 // sibling call optimization. For example, gcc is able to do SCO for caller1
5091 // in the following example, but not for caller2.
5092 // struct test {
5093 // long int a;
5094 // char ary[56];
5095 // } gTest;
5096 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5097 // b->a = v.a;
5098 // return 0;
5099 // }
5100 // void caller1(struct test a, struct test c, struct test *b) {
5101 // callee(gTest, b); }
5102 // void caller2(struct test *b) { callee(gTest, b); }
5103 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5104 return false;
5105
5106 // If callee and caller use different calling conventions, we cannot pass
5107 // parameters on stack since offsets for the parameter area may be different.
5108 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5109 return false;
5110
5111 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5112 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5113 // callee potentially have different TOC bases then we cannot tail call since
5114 // we need to restore the TOC pointer after the call.
5115 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5116 // We cannot guarantee this for indirect calls or calls to external functions.
5117 // When PC-Relative addressing is used, the concept of the TOC is no longer
5118 // applicable so this check is not required.
5119 // Check first for indirect calls.
5120 if (!Subtarget.isUsingPCRelativeCalls() &&
5121 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5122 return false;
5123
5124 // Check if we share the TOC base.
5125 if (!Subtarget.isUsingPCRelativeCalls() &&
5126 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5127 return false;
5128
5129 // TCO allows altering callee ABI, so we don't have to check further.
5130 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5131 return true;
5132
5133 if (DisableSCO) return false;
5134
5135 // If callee use the same argument list that caller is using, then we can
5136 // apply SCO on this case. If it is not, then we need to check if callee needs
5137 // stack for passing arguments.
5138 // PC Relative tail calls may not have a CallBase.
5139 // If there is no CallBase we cannot verify if we have the same argument
5140 // list so assume that we don't have the same argument list.
5141 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5142 needStackSlotPassParameters(Subtarget, Outs))
5143 return false;
5144 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5145 return false;
5146
5147 return true;
5148}
5149
5150/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5151/// for tail call optimization. Targets which want to do tail call
5152/// optimization should implement this function.
5153bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5154 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5155 CallingConv::ID CallerCC, bool isVarArg,
5156 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5157 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5158 return false;
5159
5160 // Variable argument functions are not supported.
5161 if (isVarArg)
5162 return false;
5163
5164 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5165 // Functions containing by val parameters are not supported.
5166 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5167 return false;
5168
5169 // Non-PIC/GOT tail calls are supported.
5170 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5171 return true;
5172
5173 // At the moment we can only do local tail calls (in same module, hidden
5174 // or protected) if we are generating PIC.
5175 if (CalleeGV)
5176 return CalleeGV->hasHiddenVisibility() ||
5177 CalleeGV->hasProtectedVisibility();
5178 }
5179
5180 return false;
5181}
5182
5183/// isCallCompatibleAddress - Return the immediate to use if the specified
5184/// 32-bit value is representable in the immediate field of a BxA instruction.
5186 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5187 if (!C) return nullptr;
5188
5189 int Addr = C->getZExtValue();
5190 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5191 SignExtend32<26>(Addr) != Addr)
5192 return nullptr; // Top 6 bits have to be sext of immediate.
5193
5194 return DAG
5196 (int)C->getZExtValue() >> 2, SDLoc(Op),
5198 .getNode();
5199}
5200
5201namespace {
5202
5203struct TailCallArgumentInfo {
5204 SDValue Arg;
5205 SDValue FrameIdxOp;
5206 int FrameIdx = 0;
5207
5208 TailCallArgumentInfo() = default;
5209};
5210
5211} // end anonymous namespace
5212
5213/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5215 SelectionDAG &DAG, SDValue Chain,
5216 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5217 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5218 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5219 SDValue Arg = TailCallArgs[i].Arg;
5220 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5221 int FI = TailCallArgs[i].FrameIdx;
5222 // Store relative to framepointer.
5223 MemOpChains.push_back(DAG.getStore(
5224 Chain, dl, Arg, FIN,
5226 }
5227}
5228
5229/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5230/// the appropriate stack slot for the tail call optimized function call.
5232 SDValue OldRetAddr, SDValue OldFP,
5233 int SPDiff, const SDLoc &dl) {
5234 if (SPDiff) {
5235 // Calculate the new stack slot for the return address.
5237 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5238 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5239 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5240 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5241 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5242 NewRetAddrLoc, true);
5243 SDValue NewRetAddrFrIdx =
5244 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5245 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5246 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5247 }
5248 return Chain;
5249}
5250
5251/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5252/// the position of the argument.
5254 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5255 int SPDiff, unsigned ArgOffset,
5256 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5257 int Offset = ArgOffset + SPDiff;
5258 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5259 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5260 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5261 SDValue FIN = DAG.getFrameIndex(FI, VT);
5262 TailCallArgumentInfo Info;
5263 Info.Arg = Arg;
5264 Info.FrameIdxOp = FIN;
5265 Info.FrameIdx = FI;
5266 TailCallArguments.push_back(Info);
5267}
5268
5269/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5270/// stack slot. Returns the chain as result and the loaded frame pointers in
5271/// LROpOut/FPOpout. Used when tail calling.
5272SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5273 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5274 SDValue &FPOpOut, const SDLoc &dl) const {
5275 if (SPDiff) {
5276 // Load the LR and FP stack slot for later adjusting.
5277 LROpOut = getReturnAddrFrameIndex(DAG);
5278 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5280 Chain = SDValue(LROpOut.getNode(), 1);
5281 }
5282 return Chain;
5283}
5284
5285/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5286/// by "Src" to address "Dst" of size "Size". Alignment information is
5287/// specified by the specific parameter attribute. The copy will be passed as
5288/// a byval function parameter.
5289/// Sometimes what we are copying is the end of a larger object, the part that
5290/// does not fit in registers.
5292 SDValue Chain, ISD::ArgFlagsTy Flags,
5293 SelectionDAG &DAG, const SDLoc &dl) {
5294 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5295 return DAG.getMemcpy(
5296 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5297 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5298}
5299
5300/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5301/// tail calls.
5303 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5304 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5305 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5306 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5308 if (!isTailCall) {
5309 if (isVector) {
5310 SDValue StackPtr;
5311 if (isPPC64)
5312 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5313 else
5314 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5315 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5316 DAG.getConstant(ArgOffset, dl, PtrVT));
5317 }
5318 MemOpChains.push_back(
5319 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5320 // Calculate and remember argument location.
5321 } else
5322 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5323 TailCallArguments);
5324}
5325
5326static void
5328 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5329 SDValue FPOp,
5330 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5331 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5332 // might overwrite each other in case of tail call optimization.
5333 SmallVector<SDValue, 8> MemOpChains2;
5334 // Do not flag preceding copytoreg stuff together with the following stuff.
5335 InGlue = SDValue();
5336 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5337 MemOpChains2, dl);
5338 if (!MemOpChains2.empty())
5339 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5340
5341 // Store the return address to the appropriate stack slot.
5342 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5343
5344 // Emit callseq_end just before tailcall node.
5345 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5346 InGlue = Chain.getValue(1);
5347}
5348
5349// Is this global address that of a function that can be called by name? (as
5350// opposed to something that must hold a descriptor for an indirect call).
5351static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5352 if (GV) {
5353 if (GV->isThreadLocal())
5354 return false;
5355
5356 return GV->getValueType()->isFunctionTy();
5357 }
5358
5359 return false;
5360}
5361
5362SDValue PPCTargetLowering::LowerCallResult(
5363 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5364 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5365 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5367 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5368 *DAG.getContext());
5369
5370 CCRetInfo.AnalyzeCallResult(
5371 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5373 : RetCC_PPC);
5374
5375 // Copy all of the result registers out of their specified physreg.
5376 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5377 CCValAssign &VA = RVLocs[i];
5378 assert(VA.isRegLoc() && "Can only return in registers!");
5379
5380 SDValue Val;
5381
5382 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5383 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5384 InGlue);
5385 Chain = Lo.getValue(1);
5386 InGlue = Lo.getValue(2);
5387 VA = RVLocs[++i]; // skip ahead to next loc
5388 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5389 InGlue);
5390 Chain = Hi.getValue(1);
5391 InGlue = Hi.getValue(2);
5392 if (!Subtarget.isLittleEndian())
5393 std::swap (Lo, Hi);
5394 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5395 } else {
5396 Val = DAG.getCopyFromReg(Chain, dl,
5397 VA.getLocReg(), VA.getLocVT(), InGlue);
5398 Chain = Val.getValue(1);
5399 InGlue = Val.getValue(2);
5400 }
5401
5402 switch (VA.getLocInfo()) {
5403 default: llvm_unreachable("Unknown loc info!");
5404 case CCValAssign::Full: break;
5405 case CCValAssign::AExt:
5406 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5407 break;
5408 case CCValAssign::ZExt:
5409 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5410 DAG.getValueType(VA.getValVT()));
5411 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5412 break;
5413 case CCValAssign::SExt:
5414 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5415 DAG.getValueType(VA.getValVT()));
5416 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5417 break;
5418 }
5419
5420 InVals.push_back(Val);
5421 }
5422
5423 return Chain;
5424}
5425
5426static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5427 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5428 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5429 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5430
5431 // PatchPoint calls are not indirect.
5432 if (isPatchPoint)
5433 return false;
5434
5435 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5436 return false;
5437
5438 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5439 // becuase the immediate function pointer points to a descriptor instead of
5440 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5441 // pointer immediate points to the global entry point, while the BLA would
5442 // need to jump to the local entry point (see rL211174).
5443 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5444 isBLACompatibleAddress(Callee, DAG))
5445 return false;
5446
5447 return true;
5448}
5449
5450// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5451static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5452 return Subtarget.isAIXABI() ||
5453 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5454}
5455
5457 const Function &Caller, const SDValue &Callee,
5458 const PPCSubtarget &Subtarget,
5459 const TargetMachine &TM,
5460 bool IsStrictFPCall = false) {
5461 if (CFlags.IsTailCall)
5462 return PPCISD::TC_RETURN;
5463
5464 unsigned RetOpc = 0;
5465 // This is a call through a function pointer.
5466 if (CFlags.IsIndirect) {
5467 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5468 // indirect calls. The save of the caller's TOC pointer to the stack will be
5469 // inserted into the DAG as part of call lowering. The restore of the TOC
5470 // pointer is modeled by using a pseudo instruction for the call opcode that
5471 // represents the 2 instruction sequence of an indirect branch and link,
5472 // immediately followed by a load of the TOC pointer from the stack save
5473 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5474 // as it is not saved or used.
5476 : PPCISD::BCTRL;
5477 } else if (Subtarget.isUsingPCRelativeCalls()) {
5478 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5479 RetOpc = PPCISD::CALL_NOTOC;
5480 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5481 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5482 // immediately following the call instruction if the caller and callee may
5483 // have different TOC bases. At link time if the linker determines the calls
5484 // may not share a TOC base, the call is redirected to a trampoline inserted
5485 // by the linker. The trampoline will (among other things) save the callers
5486 // TOC pointer at an ABI designated offset in the linkage area and the
5487 // linker will rewrite the nop to be a load of the TOC pointer from the
5488 // linkage area into gpr2.
5489 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5490 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5491 RetOpc =
5492 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5493 } else
5494 RetOpc = PPCISD::CALL;
5495 if (IsStrictFPCall) {
5496 switch (RetOpc) {
5497 default:
5498 llvm_unreachable("Unknown call opcode");
5501 break;
5502 case PPCISD::BCTRL:
5503 RetOpc = PPCISD::BCTRL_RM;
5504 break;
5505 case PPCISD::CALL_NOTOC:
5506 RetOpc = PPCISD::CALL_NOTOC_RM;
5507 break;
5508 case PPCISD::CALL:
5509 RetOpc = PPCISD::CALL_RM;
5510 break;
5511 case PPCISD::CALL_NOP:
5512 RetOpc = PPCISD::CALL_NOP_RM;
5513 break;
5514 }
5515 }
5516 return RetOpc;
5517}
5518
5519static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5520 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5521 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5522 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5523 return SDValue(Dest, 0);
5524
5525 // Returns true if the callee is local, and false otherwise.
5526 auto isLocalCallee = [&]() {
5527 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5528 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5529
5530 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5531 !isa_and_nonnull<GlobalIFunc>(GV);
5532 };
5533
5534 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5535 // a static relocation model causes some versions of GNU LD (2.17.50, at
5536 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5537 // built with secure-PLT.
5538 bool UsePlt =
5539 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5541
5542 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5543 const TargetMachine &TM = Subtarget.getTargetMachine();
5544 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5545 MCSymbolXCOFF *S =
5546 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5547
5549 return DAG.getMCSymbol(S, PtrVT);
5550 };
5551
5552 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5553 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5554 if (isFunctionGlobalAddress(GV)) {
5555 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5556
5557 if (Subtarget.isAIXABI()) {
5558 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5559 return getAIXFuncEntryPointSymbolSDNode(GV);
5560 }
5561 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5562 UsePlt ? PPCII::MO_PLT : 0);
5563 }
5564
5565 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5566 const char *SymName = S->getSymbol();
5567 if (Subtarget.isAIXABI()) {
5568 // If there exists a user-declared function whose name is the same as the
5569 // ExternalSymbol's, then we pick up the user-declared version.
5571 if (const Function *F =
5572 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5573 return getAIXFuncEntryPointSymbolSDNode(F);
5574
5575 // On AIX, direct function calls reference the symbol for the function's
5576 // entry point, which is named by prepending a "." before the function's
5577 // C-linkage name. A Qualname is returned here because an external
5578 // function entry point is a csect with XTY_ER property.
5579 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5580 auto &Context = DAG.getMachineFunction().getContext();
5581 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5582 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5584 return Sec->getQualNameSymbol();
5585 };
5586
5587 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5588 }
5589 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5590 UsePlt ? PPCII::MO_PLT : 0);
5591 }
5592
5593 // No transformation needed.
5594 assert(Callee.getNode() && "What no callee?");
5595 return Callee;
5596}
5597
5599 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5600 "Expected a CALLSEQ_STARTSDNode.");
5601
5602 // The last operand is the chain, except when the node has glue. If the node
5603 // has glue, then the last operand is the glue, and the chain is the second
5604 // last operand.
5605 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5606 if (LastValue.getValueType() != MVT::Glue)
5607 return LastValue;
5608
5609 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5610}
5611
5612// Creates the node that moves a functions address into the count register
5613// to prepare for an indirect call instruction.
5614static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5615 SDValue &Glue, SDValue &Chain,
5616 const SDLoc &dl) {
5617 SDValue MTCTROps[] = {Chain, Callee, Glue};
5618 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5619 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5620 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5621 // The glue is the second value produced.
5622 Glue = Chain.getValue(1);
5623}
5624
5626 SDValue &Glue, SDValue &Chain,
5627 SDValue CallSeqStart,
5628 const CallBase *CB, const SDLoc &dl,
5629 bool hasNest,
5630 const PPCSubtarget &Subtarget) {
5631 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5632 // entry point, but to the function descriptor (the function entry point
5633 // address is part of the function descriptor though).
5634 // The function descriptor is a three doubleword structure with the
5635 // following fields: function entry point, TOC base address and
5636 // environment pointer.
5637 // Thus for a call through a function pointer, the following actions need
5638 // to be performed:
5639 // 1. Save the TOC of the caller in the TOC save area of its stack
5640 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5641 // 2. Load the address of the function entry point from the function
5642 // descriptor.
5643 // 3. Load the TOC of the callee from the function descriptor into r2.
5644 // 4. Load the environment pointer from the function descriptor into
5645 // r11.
5646 // 5. Branch to the function entry point address.
5647 // 6. On return of the callee, the TOC of the caller needs to be
5648 // restored (this is done in FinishCall()).
5649 //
5650 // The loads are scheduled at the beginning of the call sequence, and the
5651 // register copies are flagged together to ensure that no other
5652 // operations can be scheduled in between. E.g. without flagging the
5653 // copies together, a TOC access in the caller could be scheduled between
5654 // the assignment of the callee TOC and the branch to the callee, which leads
5655 // to incorrect code.
5656
5657 // Start by loading the function address from the descriptor.
5658 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5659 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5663
5664 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5665
5666 // Registers used in building the DAG.
5667 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5668 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5669
5670 // Offsets of descriptor members.
5671 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5672 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5673
5674 const MVT RegVT = Subtarget.getScalarIntVT();
5675 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5676
5677 // One load for the functions entry point address.
5678 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5679 Alignment, MMOFlags);
5680
5681 // One for loading the TOC anchor for the module that contains the called
5682 // function.
5683 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5684 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5685 SDValue TOCPtr =
5686 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5687 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5688
5689 // One for loading the environment pointer.
5690 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5691 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5692 SDValue LoadEnvPtr =
5693 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5694 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5695
5696
5697 // Then copy the newly loaded TOC anchor to the TOC pointer.
5698 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5699 Chain = TOCVal.getValue(0);
5700 Glue = TOCVal.getValue(1);
5701
5702 // If the function call has an explicit 'nest' parameter, it takes the
5703 // place of the environment pointer.
5704 assert((!hasNest || !Subtarget.isAIXABI()) &&
5705 "Nest parameter is not supported on AIX.");
5706 if (!hasNest) {
5707 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5708 Chain = EnvVal.getValue(0);
5709 Glue = EnvVal.getValue(1);
5710 }
5711
5712 // The rest of the indirect call sequence is the same as the non-descriptor
5713 // DAG.
5714 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5715}
5716
5717static void
5719 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5720 SelectionDAG &DAG,
5721 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5722 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5723 const PPCSubtarget &Subtarget) {
5724 const bool IsPPC64 = Subtarget.isPPC64();
5725 // MVT for a general purpose register.
5726 const MVT RegVT = Subtarget.getScalarIntVT();
5727
5728 // First operand is always the chain.
5729 Ops.push_back(Chain);
5730
5731 // If it's a direct call pass the callee as the second operand.
5732 if (!CFlags.IsIndirect)
5733 Ops.push_back(Callee);
5734 else {
5735 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5736
5737 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5738 // on the stack (this would have been done in `LowerCall_64SVR4` or
5739 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5740 // represents both the indirect branch and a load that restores the TOC
5741 // pointer from the linkage area. The operand for the TOC restore is an add
5742 // of the TOC save offset to the stack pointer. This must be the second
5743 // operand: after the chain input but before any other variadic arguments.
5744 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5745 // saved or used.
5746 if (isTOCSaveRestoreRequired(Subtarget)) {
5747 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5748
5749 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5750 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5751 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5752 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5753 Ops.push_back(AddTOC);
5754 }
5755
5756 // Add the register used for the environment pointer.
5757 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5759 RegVT));
5760
5761
5762 // Add CTR register as callee so a bctr can be emitted later.
5763 if (CFlags.IsTailCall)
5764 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5765 }
5766
5767 // If this is a tail call add stack pointer delta.
5768 if (CFlags.IsTailCall)
5769 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5770
5771 // Add argument registers to the end of the list so that they are known live
5772 // into the call.
5773 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5774 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5775 RegsToPass[i].second.getValueType()));
5776
5777 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5778 // no way to mark dependencies as implicit here.
5779 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5780 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5781 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5782 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5783
5784 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5785 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5786 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5787
5788 // Add a register mask operand representing the call-preserved registers.
5789 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5790 const uint32_t *Mask =
5791 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5792 assert(Mask && "Missing call preserved mask for calling convention");
5793 Ops.push_back(DAG.getRegisterMask(Mask));
5794
5795 // If the glue is valid, it is the last operand.
5796 if (Glue.getNode())
5797 Ops.push_back(Glue);
5798}
5799
5800SDValue PPCTargetLowering::FinishCall(
5801 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5802 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5803 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5804 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5805 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5806
5807 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5808 Subtarget.isAIXABI())
5809 setUsesTOCBasePtr(DAG);
5810
5811 unsigned CallOpc =
5812 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5813 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5814
5815 if (!CFlags.IsIndirect)
5816 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5817 else if (Subtarget.usesFunctionDescriptors())
5818 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5819 dl, CFlags.HasNest, Subtarget);
5820 else
5821 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5822
5823 // Build the operand list for the call instruction.
5825 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5826 SPDiff, Subtarget);
5827
5828 // Emit tail call.
5829 if (CFlags.IsTailCall) {
5830 // Indirect tail call when using PC Relative calls do not have the same
5831 // constraints.
5832 assert(((Callee.getOpcode() == ISD::Register &&
5833 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5834 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5835 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5836 isa<ConstantSDNode>(Callee) ||
5837 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5838 "Expecting a global address, external symbol, absolute value, "
5839 "register or an indirect tail call when PC Relative calls are "
5840 "used.");
5841 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5842 assert(CallOpc == PPCISD::TC_RETURN &&
5843 "Unexpected call opcode for a tail call.");
5845 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5846 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5847 return Ret;
5848 }
5849
5850 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5851 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5852 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5853 Glue = Chain.getValue(1);
5854
5855 // When performing tail call optimization the callee pops its arguments off
5856 // the stack. Account for this here so these bytes can be pushed back on in
5857 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5858 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5860 ? NumBytes
5861 : 0;
5862
5863 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5864 Glue = Chain.getValue(1);
5865
5866 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5867 DAG, InVals);
5868}
5869
5871 CallingConv::ID CalleeCC = CB->getCallingConv();
5872 const Function *CallerFunc = CB->getCaller();
5873 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5874 const Function *CalleeFunc = CB->getCalledFunction();
5875 if (!CalleeFunc)
5876 return false;
5877 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5878
5881
5882 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5883 CalleeFunc->getAttributes(), Outs, *this,
5884 CalleeFunc->getDataLayout());
5885
5886 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5887 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5888 false /*isCalleeExternalSymbol*/);
5889}
5890
5891bool PPCTargetLowering::isEligibleForTCO(
5892 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5893 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5895 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5896 bool isCalleeExternalSymbol) const {
5897 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5898 return false;
5899
5900 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5901 return IsEligibleForTailCallOptimization_64SVR4(
5902 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5903 isCalleeExternalSymbol);
5904 else
5905 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5906 isVarArg, Ins);
5907}
5908
5909SDValue
5910PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5911 SmallVectorImpl<SDValue> &InVals) const {
5912 SelectionDAG &DAG = CLI.DAG;
5913 SDLoc &dl = CLI.DL;
5915 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5917 SDValue Chain = CLI.Chain;
5918 SDValue Callee = CLI.Callee;
5919 bool &isTailCall = CLI.IsTailCall;
5920 CallingConv::ID CallConv = CLI.CallConv;
5921 bool isVarArg = CLI.IsVarArg;
5922 bool isPatchPoint = CLI.IsPatchPoint;
5923 const CallBase *CB = CLI.CB;
5924
5925 if (isTailCall) {
5927 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5928 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5929 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5930 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5931
5932 isTailCall =
5933 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5934 &(MF.getFunction()), IsCalleeExternalSymbol);
5935 if (isTailCall) {
5936 ++NumTailCalls;
5937 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5938 ++NumSiblingCalls;
5939
5940 // PC Relative calls no longer guarantee that the callee is a Global
5941 // Address Node. The callee could be an indirect tail call in which
5942 // case the SDValue for the callee could be a load (to load the address
5943 // of a function pointer) or it may be a register copy (to move the
5944 // address of the callee from a function parameter into a virtual
5945 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5946 assert((Subtarget.isUsingPCRelativeCalls() ||
5947 isa<GlobalAddressSDNode>(Callee)) &&
5948 "Callee should be an llvm::Function object.");
5949
5950 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5951 << "\nTCO callee: ");
5952 LLVM_DEBUG(Callee.dump());
5953 }
5954 }
5955
5956 if (!isTailCall && CB && CB->isMustTailCall())
5957 report_fatal_error("failed to perform tail call elimination on a call "
5958 "site marked musttail");
5959
5960 // When long calls (i.e. indirect calls) are always used, calls are always
5961 // made via function pointer. If we have a function name, first translate it
5962 // into a pointer.
5963 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5964 !isTailCall)
5965 Callee = LowerGlobalAddress(Callee, DAG);
5966
5967 CallFlags CFlags(
5968 CallConv, isTailCall, isVarArg, isPatchPoint,
5969 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5970 // hasNest
5971 Subtarget.is64BitELFABI() &&
5972 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5973 CLI.NoMerge);
5974
5975 if (Subtarget.isAIXABI())
5976 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5977 InVals, CB);
5978
5979 assert(Subtarget.isSVR4ABI());
5980 if (Subtarget.isPPC64())
5981 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5982 InVals, CB);
5983 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5984 InVals, CB);
5985}
5986
5987SDValue PPCTargetLowering::LowerCall_32SVR4(
5988 SDValue Chain, SDValue Callee, CallFlags CFlags,
5990 const SmallVectorImpl<SDValue> &OutVals,
5991 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5993 const CallBase *CB) const {
5994 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5995 // of the 32-bit SVR4 ABI stack frame layout.
5996
5997 const CallingConv::ID CallConv = CFlags.CallConv;
5998 const bool IsVarArg = CFlags.IsVarArg;
5999 const bool IsTailCall = CFlags.IsTailCall;
6000
6001 assert((CallConv == CallingConv::C ||
6002 CallConv == CallingConv::Cold ||
6003 CallConv == CallingConv::Fast) && "Unknown calling convention!");
6004
6005 const Align PtrAlign(4);
6006
6008
6009 // Mark this function as potentially containing a function that contains a
6010 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6011 // and restoring the callers stack pointer in this functions epilog. This is
6012 // done because by tail calling the called function might overwrite the value
6013 // in this function's (MF) stack pointer stack slot 0(SP).
6014 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6015 CallConv == CallingConv::Fast)
6016 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6017
6018 // Count how many bytes are to be pushed on the stack, including the linkage
6019 // area, parameter list area and the part of the local variable space which
6020 // contains copies of aggregates which are passed by value.
6021
6022 // Assign locations to all of the outgoing arguments.
6024 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6025
6026 // Reserve space for the linkage area on the stack.
6027 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6028 PtrAlign);
6029 if (useSoftFloat())
6030 CCInfo.PreAnalyzeCallOperands(Outs);
6031
6032 if (IsVarArg) {
6033 // Handle fixed and variable vector arguments differently.
6034 // Fixed vector arguments go into registers as long as registers are
6035 // available. Variable vector arguments always go into memory.
6036 unsigned NumArgs = Outs.size();
6037
6038 for (unsigned i = 0; i != NumArgs; ++i) {
6039 MVT ArgVT = Outs[i].VT;
6040 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6041 bool Result;
6042
6043 if (Outs[i].IsFixed) {
6044 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6045 CCInfo);
6046 } else {
6048 ArgFlags, CCInfo);
6049 }
6050
6051 if (Result) {
6052#ifndef NDEBUG
6053 errs() << "Call operand #" << i << " has unhandled type "
6054 << ArgVT << "\n";
6055#endif
6056 llvm_unreachable(nullptr);
6057 }
6058 }
6059 } else {
6060 // All arguments are treated the same.
6061 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6062 }
6063 CCInfo.clearWasPPCF128();
6064
6065 // Assign locations to all of the outgoing aggregate by value arguments.
6066 SmallVector<CCValAssign, 16> ByValArgLocs;
6067 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6068
6069 // Reserve stack space for the allocations in CCInfo.
6070 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6071
6072 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6073
6074 // Size of the linkage area, parameter list area and the part of the local
6075 // space variable where copies of aggregates which are passed by value are
6076 // stored.
6077 unsigned NumBytes = CCByValInfo.getStackSize();
6078
6079 // Calculate by how many bytes the stack has to be adjusted in case of tail
6080 // call optimization.
6081 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6082
6083 // Adjust the stack pointer for the new arguments...
6084 // These operations are automatically eliminated by the prolog/epilog pass
6085 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6086 SDValue CallSeqStart = Chain;
6087
6088 // Load the return address and frame pointer so it can be moved somewhere else
6089 // later.
6090 SDValue LROp, FPOp;
6091 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6092
6093 // Set up a copy of the stack pointer for use loading and storing any
6094 // arguments that may not fit in the registers available for argument
6095 // passing.
6096 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6097
6099 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6100 SmallVector<SDValue, 8> MemOpChains;
6101
6102 bool seenFloatArg = false;
6103 // Walk the register/memloc assignments, inserting copies/loads.
6104 // i - Tracks the index into the list of registers allocated for the call
6105 // RealArgIdx - Tracks the index into the list of actual function arguments
6106 // j - Tracks the index into the list of byval arguments
6107 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6108 i != e;
6109 ++i, ++RealArgIdx) {
6110 CCValAssign &VA = ArgLocs[i];
6111 SDValue Arg = OutVals[RealArgIdx];
6112 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6113
6114 if (Flags.isByVal()) {
6115 // Argument is an aggregate which is passed by value, thus we need to
6116 // create a copy of it in the local variable space of the current stack
6117 // frame (which is the stack frame of the caller) and pass the address of
6118 // this copy to the callee.
6119 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6120 CCValAssign &ByValVA = ByValArgLocs[j++];
6121 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6122
6123 // Memory reserved in the local variable space of the callers stack frame.
6124 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6125
6126 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6127 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6128 StackPtr, PtrOff);
6129
6130 // Create a copy of the argument in the local area of the current
6131 // stack frame.
6132 SDValue MemcpyCall =
6133 CreateCopyOfByValArgument(Arg, PtrOff,
6134 CallSeqStart.getNode()->getOperand(0),
6135 Flags, DAG, dl);
6136
6137 // This must go outside the CALLSEQ_START..END.
6138 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6139 SDLoc(MemcpyCall));
6140 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6141 NewCallSeqStart.getNode());
6142 Chain = CallSeqStart = NewCallSeqStart;
6143
6144 // Pass the address of the aggregate copy on the stack either in a
6145 // physical register or in the parameter list area of the current stack
6146 // frame to the callee.
6147 Arg = PtrOff;
6148 }
6149
6150 // When useCRBits() is true, there can be i1 arguments.
6151 // It is because getRegisterType(MVT::i1) => MVT::i1,
6152 // and for other integer types getRegisterType() => MVT::i32.
6153 // Extend i1 and ensure callee will get i32.
6154 if (Arg.getValueType() == MVT::i1)
6155 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6156 dl, MVT::i32, Arg);
6157
6158 if (VA.isRegLoc()) {
6159 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6160 // Put argument in a physical register.
6161 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6162 bool IsLE = Subtarget.isLittleEndian();
6163 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6164 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6165 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6166 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6167 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6168 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6169 SVal.getValue(0)));
6170 } else
6171 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6172 } else {
6173 // Put argument in the parameter list area of the current stack frame.
6174 assert(VA.isMemLoc());
6175 unsigned LocMemOffset = VA.getLocMemOffset();
6176
6177 if (!IsTailCall) {
6178 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6179 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6180 StackPtr, PtrOff);
6181
6182 MemOpChains.push_back(
6183 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6184 } else {
6185 // Calculate and remember argument location.
6186 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6187 TailCallArguments);
6188 }
6189 }
6190 }
6191
6192 if (!MemOpChains.empty())
6193 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6194
6195 // Build a sequence of copy-to-reg nodes chained together with token chain
6196 // and flag operands which copy the outgoing args into the appropriate regs.
6197 SDValue InGlue;
6198 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6199 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6200 RegsToPass[i].second, InGlue);
6201 InGlue = Chain.getValue(1);
6202 }
6203
6204 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6205 // registers.
6206 if (IsVarArg) {
6207 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6208 SDValue Ops[] = { Chain, InGlue };
6209
6210 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6211 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6212
6213 InGlue = Chain.getValue(1);
6214 }
6215
6216 if (IsTailCall)
6217 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6218 TailCallArguments);
6219
6220 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6221 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6222}
6223
6224// Copy an argument into memory, being careful to do this outside the
6225// call sequence for the call to which the argument belongs.
6226SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6227 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6228 SelectionDAG &DAG, const SDLoc &dl) const {
6229 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6230 CallSeqStart.getNode()->getOperand(0),
6231 Flags, DAG, dl);
6232 // The MEMCPY must go outside the CALLSEQ_START..END.
6233 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6234 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6235 SDLoc(MemcpyCall));
6236 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6237 NewCallSeqStart.getNode());
6238 return NewCallSeqStart;
6239}
6240
6241SDValue PPCTargetLowering::LowerCall_64SVR4(
6242 SDValue Chain, SDValue Callee, CallFlags CFlags,
6244 const SmallVectorImpl<SDValue> &OutVals,
6245 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6247 const CallBase *CB) const {
6248 bool isELFv2ABI = Subtarget.isELFv2ABI();
6249 bool isLittleEndian = Subtarget.isLittleEndian();
6250 unsigned NumOps = Outs.size();
6251 bool IsSibCall = false;
6252 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6253
6254 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6255 unsigned PtrByteSize = 8;
6256
6258
6259 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6260 IsSibCall = true;
6261
6262 // Mark this function as potentially containing a function that contains a
6263 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6264 // and restoring the callers stack pointer in this functions epilog. This is
6265 // done because by tail calling the called function might overwrite the value
6266 // in this function's (MF) stack pointer stack slot 0(SP).
6267 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6268 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6269
6270 assert(!(IsFastCall && CFlags.IsVarArg) &&
6271 "fastcc not supported on varargs functions");
6272
6273 // Count how many bytes are to be pushed on the stack, including the linkage
6274 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6275 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6276 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6277 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6278 unsigned NumBytes = LinkageSize;
6279 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6280
6281 static const MCPhysReg GPR[] = {
6282 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6283 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6284 };
6285 static const MCPhysReg VR[] = {
6286 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6287 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6288 };
6289
6290 const unsigned NumGPRs = std::size(GPR);
6291 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6292 const unsigned NumVRs = std::size(VR);
6293
6294 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6295 // can be passed to the callee in registers.
6296 // For the fast calling convention, there is another check below.
6297 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6298 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6299 if (!HasParameterArea) {
6300 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6301 unsigned AvailableFPRs = NumFPRs;
6302 unsigned AvailableVRs = NumVRs;
6303 unsigned NumBytesTmp = NumBytes;
6304 for (unsigned i = 0; i != NumOps; ++i) {
6305 if (Outs[i].Flags.isNest()) continue;
6306 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6307 PtrByteSize, LinkageSize, ParamAreaSize,
6308 NumBytesTmp, AvailableFPRs, AvailableVRs))
6309 HasParameterArea = true;
6310 }
6311 }
6312
6313 // When using the fast calling convention, we don't provide backing for
6314 // arguments that will be in registers.
6315 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6316
6317 // Avoid allocating parameter area for fastcc functions if all the arguments
6318 // can be passed in the registers.
6319 if (IsFastCall)
6320 HasParameterArea = false;
6321
6322 // Add up all the space actually used.
6323 for (unsigned i = 0; i != NumOps; ++i) {
6324 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6325 EVT ArgVT = Outs[i].VT;
6326 EVT OrigVT = Outs[i].ArgVT;
6327
6328 if (Flags.isNest())
6329 continue;
6330
6331 if (IsFastCall) {
6332 if (Flags.isByVal()) {
6333 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6334 if (NumGPRsUsed > NumGPRs)
6335 HasParameterArea = true;
6336 } else {
6337 switch (ArgVT.getSimpleVT().SimpleTy) {
6338 default: llvm_unreachable("Unexpected ValueType for argument!");
6339 case MVT::i1:
6340 case MVT::i32:
6341 case MVT::i64:
6342 if (++NumGPRsUsed <= NumGPRs)
6343 continue;
6344 break;
6345 case MVT::v4i32:
6346 case MVT::v8i16:
6347 case MVT::v16i8:
6348 case MVT::v2f64:
6349 case MVT::v2i64:
6350 case MVT::v1i128:
6351 case MVT::f128:
6352 if (++NumVRsUsed <= NumVRs)
6353 continue;
6354 break;
6355 case MVT::v4f32:
6356 if (++NumVRsUsed <= NumVRs)
6357 continue;
6358 break;
6359 case MVT::f32:
6360 case MVT::f64:
6361 if (++NumFPRsUsed <= NumFPRs)
6362 continue;
6363 break;
6364 }
6365 HasParameterArea = true;
6366 }
6367 }
6368
6369 /* Respect alignment of argument on the stack. */
6370 auto Alignement =
6371 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6372 NumBytes = alignTo(NumBytes, Alignement);
6373
6374 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6375 if (Flags.isInConsecutiveRegsLast())
6376 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6377 }
6378
6379 unsigned NumBytesActuallyUsed = NumBytes;
6380
6381 // In the old ELFv1 ABI,
6382 // the prolog code of the callee may store up to 8 GPR argument registers to
6383 // the stack, allowing va_start to index over them in memory if its varargs.
6384 // Because we cannot tell if this is needed on the caller side, we have to
6385 // conservatively assume that it is needed. As such, make sure we have at
6386 // least enough stack space for the caller to store the 8 GPRs.
6387 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6388 // really requires memory operands, e.g. a vararg function.
6389 if (HasParameterArea)
6390 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6391 else
6392 NumBytes = LinkageSize;
6393
6394 // Tail call needs the stack to be aligned.
6395 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6396 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6397
6398 int SPDiff = 0;
6399
6400 // Calculate by how many bytes the stack has to be adjusted in case of tail
6401 // call optimization.
6402 if (!IsSibCall)
6403 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6404
6405 // To protect arguments on the stack from being clobbered in a tail call,
6406 // force all the loads to happen before doing any other lowering.
6407 if (CFlags.IsTailCall)
6408 Chain = DAG.getStackArgumentTokenFactor(Chain);
6409
6410 // Adjust the stack pointer for the new arguments...
6411 // These operations are automatically eliminated by the prolog/epilog pass
6412 if (!IsSibCall)
6413 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6414 SDValue CallSeqStart = Chain;
6415
6416 // Load the return address and frame pointer so it can be move somewhere else
6417 // later.
6418 SDValue LROp, FPOp;
6419 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6420
6421 // Set up a copy of the stack pointer for use loading and storing any
6422 // arguments that may not fit in the registers available for argument
6423 // passing.
6424 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6425
6426 // Figure out which arguments are going to go in registers, and which in
6427 // memory. Also, if this is a vararg function, floating point operations
6428 // must be stored to our stack, and loaded into integer regs as well, if
6429 // any integer regs are available for argument passing.
6430 unsigned ArgOffset = LinkageSize;
6431
6433 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6434
6435 SmallVector<SDValue, 8> MemOpChains;
6436 for (unsigned i = 0; i != NumOps; ++i) {
6437 SDValue Arg = OutVals[i];
6438 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6439 EVT ArgVT = Outs[i].VT;
6440 EVT OrigVT = Outs[i].ArgVT;
6441
6442 // PtrOff will be used to store the current argument to the stack if a
6443 // register cannot be found for it.
6444 SDValue PtrOff;
6445
6446 // We re-align the argument offset for each argument, except when using the
6447 // fast calling convention, when we need to make sure we do that only when
6448 // we'll actually use a stack slot.
6449 auto ComputePtrOff = [&]() {
6450 /* Respect alignment of argument on the stack. */
6451 auto Alignment =
6452 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6453 ArgOffset = alignTo(ArgOffset, Alignment);
6454
6455 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6456
6457 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6458 };
6459
6460 if (!IsFastCall) {
6461 ComputePtrOff();
6462
6463 /* Compute GPR index associated with argument offset. */
6464 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6465 GPR_idx = std::min(GPR_idx, NumGPRs);
6466 }
6467
6468 // Promote integers to 64-bit values.
6469 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6470 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6471 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6472 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6473 }
6474
6475 // FIXME memcpy is used way more than necessary. Correctness first.
6476 // Note: "by value" is code for passing a structure by value, not
6477 // basic types.
6478 if (Flags.isByVal()) {
6479 // Note: Size includes alignment padding, so
6480 // struct x { short a; char b; }
6481 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6482 // These are the proper values we need for right-justifying the
6483 // aggregate in a parameter register.
6484 unsigned Size = Flags.getByValSize();
6485
6486 // An empty aggregate parameter takes up no storage and no
6487 // registers.
6488 if (Size == 0)
6489 continue;
6490
6491 if (IsFastCall)
6492 ComputePtrOff();
6493
6494 // All aggregates smaller than 8 bytes must be passed right-justified.
6495 if (Size==1 || Size==2 || Size==4) {
6496 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6497 if (GPR_idx != NumGPRs) {
6498 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6499 MachinePointerInfo(), VT);
6500 MemOpChains.push_back(Load.getValue(1));
6501 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6502
6503 ArgOffset += PtrByteSize;
6504 continue;
6505 }
6506 }
6507
6508 if (GPR_idx == NumGPRs && Size < 8) {
6509 SDValue AddPtr = PtrOff;
6510 if (!isLittleEndian) {
6511 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6512 PtrOff.getValueType());
6513 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6514 }
6515 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6516 CallSeqStart,
6517 Flags, DAG, dl);
6518 ArgOffset += PtrByteSize;
6519 continue;
6520 }
6521 // Copy the object to parameter save area if it can not be entirely passed
6522 // by registers.
6523 // FIXME: we only need to copy the parts which need to be passed in
6524 // parameter save area. For the parts passed by registers, we don't need
6525 // to copy them to the stack although we need to allocate space for them
6526 // in parameter save area.
6527 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6528 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6529 CallSeqStart,
6530 Flags, DAG, dl);
6531
6532 // When a register is available, pass a small aggregate right-justified.
6533 if (Size < 8 && GPR_idx != NumGPRs) {
6534 // The easiest way to get this right-justified in a register
6535 // is to copy the structure into the rightmost portion of a
6536 // local variable slot, then load the whole slot into the
6537 // register.
6538 // FIXME: The memcpy seems to produce pretty awful code for
6539 // small aggregates, particularly for packed ones.
6540 // FIXME: It would be preferable to use the slot in the
6541 // parameter save area instead of a new local variable.
6542 SDValue AddPtr = PtrOff;
6543 if (!isLittleEndian) {
6544 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6545 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6546 }
6547 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6548 CallSeqStart,
6549 Flags, DAG, dl);
6550
6551 // Load the slot into the register.
6552 SDValue Load =
6553 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6554 MemOpChains.push_back(Load.getValue(1));
6555 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6556
6557 // Done with this argument.
6558 ArgOffset += PtrByteSize;
6559 continue;
6560 }
6561
6562 // For aggregates larger than PtrByteSize, copy the pieces of the
6563 // object that fit into registers from the parameter save area.
6564 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6565 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6566 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6567 if (GPR_idx != NumGPRs) {
6568 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6569 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6570 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6571 MachinePointerInfo(), ObjType);
6572
6573 MemOpChains.push_back(Load.getValue(1));
6574 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6575 ArgOffset += PtrByteSize;
6576 } else {
6577 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6578 break;
6579 }
6580 }
6581 continue;
6582 }
6583
6584 switch (Arg.getSimpleValueType().SimpleTy) {
6585 default: llvm_unreachable("Unexpected ValueType for argument!");
6586 case MVT::i1:
6587 case MVT::i32:
6588 case MVT::i64:
6589 if (Flags.isNest()) {
6590 // The 'nest' parameter, if any, is passed in R11.
6591 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6592 break;
6593 }
6594
6595 // These can be scalar arguments or elements of an integer array type
6596 // passed directly. Clang may use those instead of "byval" aggregate
6597 // types to avoid forcing arguments to memory unnecessarily.
6598 if (GPR_idx != NumGPRs) {
6599 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6600 } else {
6601 if (IsFastCall)
6602 ComputePtrOff();
6603
6604 assert(HasParameterArea &&
6605 "Parameter area must exist to pass an argument in memory.");
6606 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6607 true, CFlags.IsTailCall, false, MemOpChains,
6608 TailCallArguments, dl);
6609 if (IsFastCall)
6610 ArgOffset += PtrByteSize;
6611 }
6612 if (!IsFastCall)
6613 ArgOffset += PtrByteSize;
6614 break;
6615 case MVT::f32:
6616 case MVT::f64: {
6617 // These can be scalar arguments or elements of a float array type
6618 // passed directly. The latter are used to implement ELFv2 homogenous
6619 // float aggregates.
6620
6621 // Named arguments go into FPRs first, and once they overflow, the
6622 // remaining arguments go into GPRs and then the parameter save area.
6623 // Unnamed arguments for vararg functions always go to GPRs and
6624 // then the parameter save area. For now, put all arguments to vararg
6625 // routines always in both locations (FPR *and* GPR or stack slot).
6626 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6627 bool NeededLoad = false;
6628
6629 // First load the argument into the next available FPR.
6630 if (FPR_idx != NumFPRs)
6631 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6632
6633 // Next, load the argument into GPR or stack slot if needed.
6634 if (!NeedGPROrStack)
6635 ;
6636 else if (GPR_idx != NumGPRs && !IsFastCall) {
6637 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6638 // once we support fp <-> gpr moves.
6639
6640 // In the non-vararg case, this can only ever happen in the
6641 // presence of f32 array types, since otherwise we never run
6642 // out of FPRs before running out of GPRs.
6643 SDValue ArgVal;
6644
6645 // Double values are always passed in a single GPR.
6646 if (Arg.getValueType() != MVT::f32) {
6647 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6648
6649 // Non-array float values are extended and passed in a GPR.
6650 } else if (!Flags.isInConsecutiveRegs()) {
6651 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6652 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6653
6654 // If we have an array of floats, we collect every odd element
6655 // together with its predecessor into one GPR.
6656 } else if (ArgOffset % PtrByteSize != 0) {
6657 SDValue Lo, Hi;
6658 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6659 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6660 if (!isLittleEndian)
6661 std::swap(Lo, Hi);
6662 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6663
6664 // The final element, if even, goes into the first half of a GPR.
6665 } else if (Flags.isInConsecutiveRegsLast()) {
6666 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6667 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6668 if (!isLittleEndian)
6669 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6670 DAG.getConstant(32, dl, MVT::i32));
6671
6672 // Non-final even elements are skipped; they will be handled
6673 // together the with subsequent argument on the next go-around.
6674 } else
6675 ArgVal = SDValue();
6676
6677 if (ArgVal.getNode())
6678 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6679 } else {
6680 if (IsFastCall)
6681 ComputePtrOff();
6682
6683 // Single-precision floating-point values are mapped to the
6684 // second (rightmost) word of the stack doubleword.
6685 if (Arg.getValueType() == MVT::f32 &&
6686 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6687 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6688 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6689 }
6690
6691 assert(HasParameterArea &&
6692 "Parameter area must exist to pass an argument in memory.");
6693 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6694 true, CFlags.IsTailCall, false, MemOpChains,
6695 TailCallArguments, dl);
6696
6697 NeededLoad = true;
6698 }
6699 // When passing an array of floats, the array occupies consecutive
6700 // space in the argument area; only round up to the next doubleword
6701 // at the end of the array. Otherwise, each float takes 8 bytes.
6702 if (!IsFastCall || NeededLoad) {
6703 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6704 Flags.isInConsecutiveRegs()) ? 4 : 8;
6705 if (Flags.isInConsecutiveRegsLast())
6706 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6707 }
6708 break;
6709 }
6710 case MVT::v4f32:
6711 case MVT::v4i32:
6712 case MVT::v8i16:
6713 case MVT::v16i8:
6714 case MVT::v2f64:
6715 case MVT::v2i64:
6716 case MVT::v1i128:
6717 case MVT::f128:
6718 // These can be scalar arguments or elements of a vector array type
6719 // passed directly. The latter are used to implement ELFv2 homogenous
6720 // vector aggregates.
6721
6722 // For a varargs call, named arguments go into VRs or on the stack as
6723 // usual; unnamed arguments always go to the stack or the corresponding
6724 // GPRs when within range. For now, we always put the value in both
6725 // locations (or even all three).
6726 if (CFlags.IsVarArg) {
6727 assert(HasParameterArea &&
6728 "Parameter area must exist if we have a varargs call.");
6729 // We could elide this store in the case where the object fits
6730 // entirely in R registers. Maybe later.
6731 SDValue Store =
6732 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6733 MemOpChains.push_back(Store);
6734 if (VR_idx != NumVRs) {
6735 SDValue Load =
6736 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6737 MemOpChains.push_back(Load.getValue(1));
6738 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6739 }
6740 ArgOffset += 16;
6741 for (unsigned i=0; i<16; i+=PtrByteSize) {
6742 if (GPR_idx == NumGPRs)
6743 break;
6744 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6745 DAG.getConstant(i, dl, PtrVT));
6746 SDValue Load =
6747 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6748 MemOpChains.push_back(Load.getValue(1));
6749 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6750 }
6751 break;
6752 }
6753
6754 // Non-varargs Altivec params go into VRs or on the stack.
6755 if (VR_idx != NumVRs) {
6756 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6757 } else {
6758 if (IsFastCall)
6759 ComputePtrOff();
6760
6761 assert(HasParameterArea &&
6762 "Parameter area must exist to pass an argument in memory.");
6763 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6764 true, CFlags.IsTailCall, true, MemOpChains,
6765 TailCallArguments, dl);
6766 if (IsFastCall)
6767 ArgOffset += 16;
6768 }
6769
6770 if (!IsFastCall)
6771 ArgOffset += 16;
6772 break;
6773 }
6774 }
6775
6776 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6777 "mismatch in size of parameter area");
6778 (void)NumBytesActuallyUsed;
6779
6780 if (!MemOpChains.empty())
6781 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6782
6783 // Check if this is an indirect call (MTCTR/BCTRL).
6784 // See prepareDescriptorIndirectCall and buildCallOperands for more
6785 // information about calls through function pointers in the 64-bit SVR4 ABI.
6786 if (CFlags.IsIndirect) {
6787 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6788 // caller in the TOC save area.
6789 if (isTOCSaveRestoreRequired(Subtarget)) {
6790 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6791 // Load r2 into a virtual register and store it to the TOC save area.
6792 setUsesTOCBasePtr(DAG);
6793 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6794 // TOC save area offset.
6795 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6796 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6797 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6798 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6800 DAG.getMachineFunction(), TOCSaveOffset));
6801 }
6802 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6803 // This does not mean the MTCTR instruction must use R12; it's easier
6804 // to model this as an extra parameter, so do that.
6805 if (isELFv2ABI && !CFlags.IsPatchPoint)
6806 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6807 }
6808
6809 // Build a sequence of copy-to-reg nodes chained together with token chain
6810 // and flag operands which copy the outgoing args into the appropriate regs.
6811 SDValue InGlue;
6812 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6813 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6814 RegsToPass[i].second, InGlue);
6815 InGlue = Chain.getValue(1);
6816 }
6817
6818 if (CFlags.IsTailCall && !IsSibCall)
6819 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6820 TailCallArguments);
6821
6822 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6823 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6824}
6825
6826// Returns true when the shadow of a general purpose argument register
6827// in the parameter save area is aligned to at least 'RequiredAlign'.
6828static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6829 assert(RequiredAlign.value() <= 16 &&
6830 "Required alignment greater than stack alignment.");
6831 switch (Reg) {
6832 default:
6833 report_fatal_error("called on invalid register.");
6834 case PPC::R5:
6835 case PPC::R9:
6836 case PPC::X3:
6837 case PPC::X5:
6838 case PPC::X7:
6839 case PPC::X9:
6840 // These registers are 16 byte aligned which is the most strict aligment
6841 // we can support.
6842 return true;
6843 case PPC::R3:
6844 case PPC::R7:
6845 case PPC::X4:
6846 case PPC::X6:
6847 case PPC::X8:
6848 case PPC::X10:
6849 // The shadow of these registers in the PSA is 8 byte aligned.
6850 return RequiredAlign <= 8;
6851 case PPC::R4:
6852 case PPC::R6:
6853 case PPC::R8:
6854 case PPC::R10:
6855 return RequiredAlign <= 4;
6856 }
6857}
6858
6859static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6860 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6861 CCState &S) {
6862 AIXCCState &State = static_cast<AIXCCState &>(S);
6863 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6865 const bool IsPPC64 = Subtarget.isPPC64();
6866 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6867 const Align PtrAlign(PtrSize);
6868 const Align StackAlign(16);
6869 const MVT RegVT = Subtarget.getScalarIntVT();
6870
6871 if (ValVT == MVT::f128)
6872 report_fatal_error("f128 is unimplemented on AIX.");
6873
6874 if (ArgFlags.isNest())
6875 report_fatal_error("Nest arguments are unimplemented.");
6876
6877 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6878 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6879 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6880 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6881 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6882 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6883
6884 static const MCPhysReg VR[] = {// Vector registers.
6885 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6886 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6887 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6888
6889 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6890
6891 if (ArgFlags.isByVal()) {
6892 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6893 if (ByValAlign > StackAlign)
6894 report_fatal_error("Pass-by-value arguments with alignment greater than "
6895 "16 are not supported.");
6896
6897 const unsigned ByValSize = ArgFlags.getByValSize();
6898 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6899
6900 // An empty aggregate parameter takes up no storage and no registers,
6901 // but needs a MemLoc for a stack slot for the formal arguments side.
6902 if (ByValSize == 0) {
6904 State.getStackSize(), RegVT, LocInfo));
6905 return false;
6906 }
6907
6908 // Shadow allocate any registers that are not properly aligned.
6909 unsigned NextReg = State.getFirstUnallocated(GPRs);
6910 while (NextReg != GPRs.size() &&
6911 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6912 // Shadow allocate next registers since its aligment is not strict enough.
6913 MCRegister Reg = State.AllocateReg(GPRs);
6914 // Allocate the stack space shadowed by said register.
6915 State.AllocateStack(PtrSize, PtrAlign);
6916 assert(Reg && "Alocating register unexpectedly failed.");
6917 (void)Reg;
6918 NextReg = State.getFirstUnallocated(GPRs);
6919 }
6920
6921 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6922 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6923 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6924 if (MCRegister Reg = State.AllocateReg(GPRs))
6925 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6926 else {
6929 LocInfo));
6930 break;
6931 }
6932 }
6933 return false;
6934 }
6935
6936 // Arguments always reserve parameter save area.
6937 switch (ValVT.SimpleTy) {
6938 default:
6939 report_fatal_error("Unhandled value type for argument.");
6940 case MVT::i64:
6941 // i64 arguments should have been split to i32 for PPC32.
6942 assert(IsPPC64 && "PPC32 should have split i64 values.");
6943 [[fallthrough]];
6944 case MVT::i1:
6945 case MVT::i32: {
6946 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6947 // AIX integer arguments are always passed in register width.
6948 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6949 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6951 if (MCRegister Reg = State.AllocateReg(GPRs))
6952 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6953 else
6954 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6955
6956 return false;
6957 }
6958 case MVT::f32:
6959 case MVT::f64: {
6960 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6961 const unsigned StoreSize = LocVT.getStoreSize();
6962 // Floats are always 4-byte aligned in the PSA on AIX.
6963 // This includes f64 in 64-bit mode for ABI compatibility.
6964 const unsigned Offset =
6965 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6966 MCRegister FReg = State.AllocateReg(FPR);
6967 if (FReg)
6968 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6969
6970 // Reserve and initialize GPRs or initialize the PSA as required.
6971 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6972 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6973 assert(FReg && "An FPR should be available when a GPR is reserved.");
6974 if (State.isVarArg()) {
6975 // Successfully reserved GPRs are only initialized for vararg calls.
6976 // Custom handling is required for:
6977 // f64 in PPC32 needs to be split into 2 GPRs.
6978 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6979 State.addLoc(
6980 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6981 }
6982 } else {
6983 // If there are insufficient GPRs, the PSA needs to be initialized.
6984 // Initialization occurs even if an FPR was initialized for
6985 // compatibility with the AIX XL compiler. The full memory for the
6986 // argument will be initialized even if a prior word is saved in GPR.
6987 // A custom memLoc is used when the argument also passes in FPR so
6988 // that the callee handling can skip over it easily.
6989 State.addLoc(
6990 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6991 LocInfo)
6992 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6993 break;
6994 }
6995 }
6996
6997 return false;
6998 }
6999 case MVT::v4f32:
7000 case MVT::v4i32:
7001 case MVT::v8i16:
7002 case MVT::v16i8:
7003 case MVT::v2i64:
7004 case MVT::v2f64:
7005 case MVT::v1i128: {
7006 const unsigned VecSize = 16;
7007 const Align VecAlign(VecSize);
7008
7009 if (!State.isVarArg()) {
7010 // If there are vector registers remaining we don't consume any stack
7011 // space.
7012 if (MCRegister VReg = State.AllocateReg(VR)) {
7013 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7014 return false;
7015 }
7016 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7017 // might be allocated in the portion of the PSA that is shadowed by the
7018 // GPRs.
7019 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7020 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7021 return false;
7022 }
7023
7024 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7025 // Burn any underaligned registers and their shadowed stack space until
7026 // we reach the required alignment.
7027 while (NextRegIndex != GPRs.size() &&
7028 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7029 // Shadow allocate register and its stack shadow.
7030 MCRegister Reg = State.AllocateReg(GPRs);
7031 State.AllocateStack(PtrSize, PtrAlign);
7032 assert(Reg && "Allocating register unexpectedly failed.");
7033 (void)Reg;
7034 NextRegIndex = State.getFirstUnallocated(GPRs);
7035 }
7036
7037 // Vectors that are passed as fixed arguments are handled differently.
7038 // They are passed in VRs if any are available (unlike arguments passed
7039 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7040 // functions)
7041 if (State.isFixed(ValNo)) {
7042 if (MCRegister VReg = State.AllocateReg(VR)) {
7043 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7044 // Shadow allocate GPRs and stack space even though we pass in a VR.
7045 for (unsigned I = 0; I != VecSize; I += PtrSize)
7046 State.AllocateReg(GPRs);
7047 State.AllocateStack(VecSize, VecAlign);
7048 return false;
7049 }
7050 // No vector registers remain so pass on the stack.
7051 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7052 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7053 return false;
7054 }
7055
7056 // If all GPRS are consumed then we pass the argument fully on the stack.
7057 if (NextRegIndex == GPRs.size()) {
7058 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7059 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7060 return false;
7061 }
7062
7063 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7064 // half of the argument, and then need to pass the remaining half on the
7065 // stack.
7066 if (GPRs[NextRegIndex] == PPC::R9) {
7067 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7068 State.addLoc(
7069 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7070
7071 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
7072 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
7073 assert(FirstReg && SecondReg &&
7074 "Allocating R9 or R10 unexpectedly failed.");
7075 State.addLoc(
7076 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7077 State.addLoc(
7078 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7079 return false;
7080 }
7081
7082 // We have enough GPRs to fully pass the vector argument, and we have
7083 // already consumed any underaligned registers. Start with the custom
7084 // MemLoc and then the custom RegLocs.
7085 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7086 State.addLoc(
7087 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7088 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7089 const MCRegister Reg = State.AllocateReg(GPRs);
7090 assert(Reg && "Failed to allocated register for vararg vector argument");
7091 State.addLoc(
7092 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7093 }
7094 return false;
7095 }
7096 }
7097 return true;
7098}
7099
7100// So far, this function is only used by LowerFormalArguments_AIX()
7102 bool IsPPC64,
7103 bool HasP8Vector,
7104 bool HasVSX) {
7105 assert((IsPPC64 || SVT != MVT::i64) &&
7106 "i64 should have been split for 32-bit codegen.");
7107
7108 switch (SVT) {
7109 default:
7110 report_fatal_error("Unexpected value type for formal argument");
7111 case MVT::i1:
7112 case MVT::i32:
7113 case MVT::i64:
7114 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7115 case MVT::f32:
7116 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7117 case MVT::f64:
7118 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7119 case MVT::v4f32:
7120 case MVT::v4i32:
7121 case MVT::v8i16:
7122 case MVT::v16i8:
7123 case MVT::v2i64:
7124 case MVT::v2f64:
7125 case MVT::v1i128:
7126 return &PPC::VRRCRegClass;
7127 }
7128}
7129
7131 SelectionDAG &DAG, SDValue ArgValue,
7132 MVT LocVT, const SDLoc &dl) {
7133 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7134 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7135
7136 if (Flags.isSExt())
7137 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7138 DAG.getValueType(ValVT));
7139 else if (Flags.isZExt())
7140 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7141 DAG.getValueType(ValVT));
7142
7143 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7144}
7145
7146static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7147 const unsigned LASize = FL->getLinkageSize();
7148
7149 if (PPC::GPRCRegClass.contains(Reg)) {
7150 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7151 "Reg must be a valid argument register!");
7152 return LASize + 4 * (Reg - PPC::R3);
7153 }
7154
7155 if (PPC::G8RCRegClass.contains(Reg)) {
7156 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7157 "Reg must be a valid argument register!");
7158 return LASize + 8 * (Reg - PPC::X3);
7159 }
7160
7161 llvm_unreachable("Only general purpose registers expected.");
7162}
7163
7164// AIX ABI Stack Frame Layout:
7165//
7166// Low Memory +--------------------------------------------+
7167// SP +---> | Back chain | ---+
7168// | +--------------------------------------------+ |
7169// | | Saved Condition Register | |
7170// | +--------------------------------------------+ |
7171// | | Saved Linkage Register | |
7172// | +--------------------------------------------+ | Linkage Area
7173// | | Reserved for compilers | |
7174// | +--------------------------------------------+ |
7175// | | Reserved for binders | |
7176// | +--------------------------------------------+ |
7177// | | Saved TOC pointer | ---+
7178// | +--------------------------------------------+
7179// | | Parameter save area |
7180// | +--------------------------------------------+
7181// | | Alloca space |
7182// | +--------------------------------------------+
7183// | | Local variable space |
7184// | +--------------------------------------------+
7185// | | Float/int conversion temporary |
7186// | +--------------------------------------------+
7187// | | Save area for AltiVec registers |
7188// | +--------------------------------------------+
7189// | | AltiVec alignment padding |
7190// | +--------------------------------------------+
7191// | | Save area for VRSAVE register |
7192// | +--------------------------------------------+
7193// | | Save area for General Purpose registers |
7194// | +--------------------------------------------+
7195// | | Save area for Floating Point registers |
7196// | +--------------------------------------------+
7197// +---- | Back chain |
7198// High Memory +--------------------------------------------+
7199//
7200// Specifications:
7201// AIX 7.2 Assembler Language Reference
7202// Subroutine linkage convention
7203
7204SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7205 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7206 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7207 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7208
7209 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7210 CallConv == CallingConv::Fast) &&
7211 "Unexpected calling convention!");
7212
7213 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7214 report_fatal_error("Tail call support is unimplemented on AIX.");
7215
7216 if (useSoftFloat())
7217 report_fatal_error("Soft float support is unimplemented on AIX.");
7218
7219 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7220
7221 const bool IsPPC64 = Subtarget.isPPC64();
7222 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7223
7224 // Assign locations to all of the incoming arguments.
7227 MachineFrameInfo &MFI = MF.getFrameInfo();
7228 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7229 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7230
7231 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7232 // Reserve space for the linkage area on the stack.
7233 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7234 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7235 uint64_t SaveStackPos = CCInfo.getStackSize();
7236 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7237 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7238
7240
7241 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7242 CCValAssign &VA = ArgLocs[I++];
7243 MVT LocVT = VA.getLocVT();
7244 MVT ValVT = VA.getValVT();
7245 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7246
7247 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7248 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7249 // For compatibility with the AIX XL compiler, the float args in the
7250 // parameter save area are initialized even if the argument is available
7251 // in register. The caller is required to initialize both the register
7252 // and memory, however, the callee can choose to expect it in either.
7253 // The memloc is dismissed here because the argument is retrieved from
7254 // the register.
7255 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7256 continue;
7257
7258 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7259 const TargetRegisterClass *RegClass = getRegClassForSVT(
7260 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7261 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7262 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7263 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7264 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7265 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7266 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7267 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7268 MachinePointerInfo(), Align(PtrByteSize));
7269 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7270 MemOps.push_back(StoreReg);
7271 }
7272
7273 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7274 unsigned StoreSize =
7275 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7276 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7277 }
7278
7279 auto HandleMemLoc = [&]() {
7280 const unsigned LocSize = LocVT.getStoreSize();
7281 const unsigned ValSize = ValVT.getStoreSize();
7282 assert((ValSize <= LocSize) &&
7283 "Object size is larger than size of MemLoc");
7284 int CurArgOffset = VA.getLocMemOffset();
7285 // Objects are right-justified because AIX is big-endian.
7286 if (LocSize > ValSize)
7287 CurArgOffset += LocSize - ValSize;
7288 // Potential tail calls could cause overwriting of argument stack slots.
7289 const bool IsImmutable =
7291 (CallConv == CallingConv::Fast));
7292 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7293 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7294 SDValue ArgValue =
7295 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7296
7297 // While the ABI specifies the argument type is (sign or zero) extended
7298 // out to register width, not all code is compliant. We truncate and
7299 // re-extend to be more forgiving of these callers when the argument type
7300 // is smaller than register width.
7301 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7302 ValVT.isInteger() &&
7303 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7304 SDValue ArgValueTrunc = DAG.getNode(
7305 ISD::TRUNCATE, dl, ArgVT.getSimpleVT() == MVT::i1 ? MVT::i8 : ArgVT,
7306 ArgValue);
7307 SDValue ArgValueExt =
7308 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7309 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7310 InVals.push_back(ArgValueExt);
7311 } else {
7312 InVals.push_back(ArgValue);
7313 }
7314 };
7315
7316 // Vector arguments to VaArg functions are passed both on the stack, and
7317 // in any available GPRs. Load the value from the stack and add the GPRs
7318 // as live ins.
7319 if (VA.isMemLoc() && VA.needsCustom()) {
7320 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7321 assert(isVarArg && "Only use custom memloc for vararg.");
7322 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7323 // matching custom RegLocs.
7324 const unsigned OriginalValNo = VA.getValNo();
7325 (void)OriginalValNo;
7326
7327 auto HandleCustomVecRegLoc = [&]() {
7328 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7329 "Missing custom RegLoc.");
7330 VA = ArgLocs[I++];
7331 assert(VA.getValVT().isVector() &&
7332 "Unexpected Val type for custom RegLoc.");
7333 assert(VA.getValNo() == OriginalValNo &&
7334 "ValNo mismatch between custom MemLoc and RegLoc.");
7336 MF.addLiveIn(VA.getLocReg(),
7337 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7338 Subtarget.hasVSX()));
7339 };
7340
7341 HandleMemLoc();
7342 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7343 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7344 // R10.
7345 HandleCustomVecRegLoc();
7346 HandleCustomVecRegLoc();
7347
7348 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7349 // we passed the vector in R5, R6, R7 and R8.
7350 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7351 assert(!IsPPC64 &&
7352 "Only 2 custom RegLocs expected for 64-bit codegen.");
7353 HandleCustomVecRegLoc();
7354 HandleCustomVecRegLoc();
7355 }
7356
7357 continue;
7358 }
7359
7360 if (VA.isRegLoc()) {
7361 if (VA.getValVT().isScalarInteger())
7363 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7364 switch (VA.getValVT().SimpleTy) {
7365 default:
7366 report_fatal_error("Unhandled value type for argument.");
7367 case MVT::f32:
7369 break;
7370 case MVT::f64:
7372 break;
7373 }
7374 } else if (VA.getValVT().isVector()) {
7375 switch (VA.getValVT().SimpleTy) {
7376 default:
7377 report_fatal_error("Unhandled value type for argument.");
7378 case MVT::v16i8:
7380 break;
7381 case MVT::v8i16:
7383 break;
7384 case MVT::v4i32:
7385 case MVT::v2i64:
7386 case MVT::v1i128:
7388 break;
7389 case MVT::v4f32:
7390 case MVT::v2f64:
7392 break;
7393 }
7394 }
7395 }
7396
7397 if (Flags.isByVal() && VA.isMemLoc()) {
7398 const unsigned Size =
7399 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7400 PtrByteSize);
7401 const int FI = MF.getFrameInfo().CreateFixedObject(
7402 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7403 /* IsAliased */ true);
7404 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7405 InVals.push_back(FIN);
7406
7407 continue;
7408 }
7409
7410 if (Flags.isByVal()) {
7411 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7412
7413 const MCPhysReg ArgReg = VA.getLocReg();
7414 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7415
7416 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7417 const int FI = MF.getFrameInfo().CreateFixedObject(
7418 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7419 /* IsAliased */ true);
7420 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7421 InVals.push_back(FIN);
7422
7423 // Add live ins for all the RegLocs for the same ByVal.
7424 const TargetRegisterClass *RegClass =
7425 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7426
7427 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7428 unsigned Offset) {
7429 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7430 // Since the callers side has left justified the aggregate in the
7431 // register, we can simply store the entire register into the stack
7432 // slot.
7433 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7434 // The store to the fixedstack object is needed becuase accessing a
7435 // field of the ByVal will use a gep and load. Ideally we will optimize
7436 // to extracting the value from the register directly, and elide the
7437 // stores when the arguments address is not taken, but that will need to
7438 // be future work.
7439 SDValue Store = DAG.getStore(
7440 CopyFrom.getValue(1), dl, CopyFrom,
7443
7444 MemOps.push_back(Store);
7445 };
7446
7447 unsigned Offset = 0;
7448 HandleRegLoc(VA.getLocReg(), Offset);
7449 Offset += PtrByteSize;
7450 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7451 Offset += PtrByteSize) {
7452 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7453 "RegLocs should be for ByVal argument.");
7454
7455 const CCValAssign RL = ArgLocs[I++];
7456 HandleRegLoc(RL.getLocReg(), Offset);
7458 }
7459
7460 if (Offset != StackSize) {
7461 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7462 "Expected MemLoc for remaining bytes.");
7463 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7464 // Consume the MemLoc.The InVal has already been emitted, so nothing
7465 // more needs to be done.
7466 ++I;
7467 }
7468
7469 continue;
7470 }
7471
7472 if (VA.isRegLoc() && !VA.needsCustom()) {
7473 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7474 Register VReg =
7475 MF.addLiveIn(VA.getLocReg(),
7476 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7477 Subtarget.hasVSX()));
7478 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7479 if (ValVT.isScalarInteger() &&
7480 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7481 ArgValue =
7482 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7483 }
7484 InVals.push_back(ArgValue);
7485 continue;
7486 }
7487 if (VA.isMemLoc()) {
7488 HandleMemLoc();
7489 continue;
7490 }
7491 }
7492
7493 // On AIX a minimum of 8 words is saved to the parameter save area.
7494 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7495 // Area that is at least reserved in the caller of this function.
7496 unsigned CallerReservedArea = std::max<unsigned>(
7497 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7498
7499 // Set the size that is at least reserved in caller of this function. Tail
7500 // call optimized function's reserved stack space needs to be aligned so
7501 // that taking the difference between two stack areas will result in an
7502 // aligned stack.
7503 CallerReservedArea =
7504 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7505 FuncInfo->setMinReservedArea(CallerReservedArea);
7506
7507 if (isVarArg) {
7508 FuncInfo->setVarArgsFrameIndex(
7509 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7510 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7511
7512 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7513 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7514
7515 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7516 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7517 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7518
7519 // The fixed integer arguments of a variadic function are stored to the
7520 // VarArgsFrameIndex on the stack so that they may be loaded by
7521 // dereferencing the result of va_next.
7522 for (unsigned GPRIndex =
7523 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7524 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7525
7526 const Register VReg =
7527 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7528 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7529
7530 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7531 SDValue Store =
7532 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7533 MemOps.push_back(Store);
7534 // Increment the address for the next argument to store.
7535 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7536 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7537 }
7538 }
7539
7540 if (!MemOps.empty())
7541 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7542
7543 return Chain;
7544}
7545
7546SDValue PPCTargetLowering::LowerCall_AIX(
7547 SDValue Chain, SDValue Callee, CallFlags CFlags,
7549 const SmallVectorImpl<SDValue> &OutVals,
7550 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7552 const CallBase *CB) const {
7553 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7554 // AIX ABI stack frame layout.
7555
7556 assert((CFlags.CallConv == CallingConv::C ||
7557 CFlags.CallConv == CallingConv::Cold ||
7558 CFlags.CallConv == CallingConv::Fast) &&
7559 "Unexpected calling convention!");
7560
7561 if (CFlags.IsPatchPoint)
7562 report_fatal_error("This call type is unimplemented on AIX.");
7563
7564 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7565
7568 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7569 *DAG.getContext());
7570
7571 // Reserve space for the linkage save area (LSA) on the stack.
7572 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7573 // [SP][CR][LR][2 x reserved][TOC].
7574 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7575 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7576 const bool IsPPC64 = Subtarget.isPPC64();
7577 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7578 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7579 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7580 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7581
7582 // The prolog code of the callee may store up to 8 GPR argument registers to
7583 // the stack, allowing va_start to index over them in memory if the callee
7584 // is variadic.
7585 // Because we cannot tell if this is needed on the caller side, we have to
7586 // conservatively assume that it is needed. As such, make sure we have at
7587 // least enough stack space for the caller to store the 8 GPRs.
7588 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7589 const unsigned NumBytes = std::max<unsigned>(
7590 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7591
7592 // Adjust the stack pointer for the new arguments...
7593 // These operations are automatically eliminated by the prolog/epilog pass.
7594 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7595 SDValue CallSeqStart = Chain;
7596
7598 SmallVector<SDValue, 8> MemOpChains;
7599
7600 // Set up a copy of the stack pointer for loading and storing any
7601 // arguments that may not fit in the registers available for argument
7602 // passing.
7603 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7604 : DAG.getRegister(PPC::R1, MVT::i32);
7605
7606 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7607 const unsigned ValNo = ArgLocs[I].getValNo();
7608 SDValue Arg = OutVals[ValNo];
7609 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7610
7611 if (Flags.isByVal()) {
7612 const unsigned ByValSize = Flags.getByValSize();
7613
7614 // Nothing to do for zero-sized ByVals on the caller side.
7615 if (!ByValSize) {
7616 ++I;
7617 continue;
7618 }
7619
7620 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7621 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7622 (LoadOffset != 0)
7623 ? DAG.getObjectPtrOffset(
7624 dl, Arg, TypeSize::getFixed(LoadOffset))
7625 : Arg,
7626 MachinePointerInfo(), VT);
7627 };
7628
7629 unsigned LoadOffset = 0;
7630
7631 // Initialize registers, which are fully occupied by the by-val argument.
7632 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7633 SDValue Load = GetLoad(PtrVT, LoadOffset);
7634 MemOpChains.push_back(Load.getValue(1));
7635 LoadOffset += PtrByteSize;
7636 const CCValAssign &ByValVA = ArgLocs[I++];
7637 assert(ByValVA.getValNo() == ValNo &&
7638 "Unexpected location for pass-by-value argument.");
7639 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7640 }
7641
7642 if (LoadOffset == ByValSize)
7643 continue;
7644
7645 // There must be one more loc to handle the remainder.
7646 assert(ArgLocs[I].getValNo() == ValNo &&
7647 "Expected additional location for by-value argument.");
7648
7649 if (ArgLocs[I].isMemLoc()) {
7650 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7651 const CCValAssign &ByValVA = ArgLocs[I++];
7652 ISD::ArgFlagsTy MemcpyFlags = Flags;
7653 // Only memcpy the bytes that don't pass in register.
7654 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7655 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7656 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7657 dl, Arg, TypeSize::getFixed(LoadOffset))
7658 : Arg,
7660 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7661 CallSeqStart, MemcpyFlags, DAG, dl);
7662 continue;
7663 }
7664
7665 // Initialize the final register residue.
7666 // Any residue that occupies the final by-val arg register must be
7667 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7668 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7669 // 2 and 1 byte loads.
7670 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7671 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7672 "Unexpected register residue for by-value argument.");
7673 SDValue ResidueVal;
7674 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7675 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7676 const MVT VT =
7677 N == 1 ? MVT::i8
7678 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7679 SDValue Load = GetLoad(VT, LoadOffset);
7680 MemOpChains.push_back(Load.getValue(1));
7681 LoadOffset += N;
7682 Bytes += N;
7683
7684 // By-val arguments are passed left-justfied in register.
7685 // Every load here needs to be shifted, otherwise a full register load
7686 // should have been used.
7687 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7688 "Unexpected load emitted during handling of pass-by-value "
7689 "argument.");
7690 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7691 EVT ShiftAmountTy =
7692 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7693 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7694 SDValue ShiftedLoad =
7695 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7696 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7697 ShiftedLoad)
7698 : ShiftedLoad;
7699 }
7700
7701 const CCValAssign &ByValVA = ArgLocs[I++];
7702 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7703 continue;
7704 }
7705
7706 CCValAssign &VA = ArgLocs[I++];
7707 const MVT LocVT = VA.getLocVT();
7708 const MVT ValVT = VA.getValVT();
7709
7710 switch (VA.getLocInfo()) {
7711 default:
7712 report_fatal_error("Unexpected argument extension type.");
7713 case CCValAssign::Full:
7714 break;
7715 case CCValAssign::ZExt:
7716 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7717 break;
7718 case CCValAssign::SExt:
7719 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7720 break;
7721 }
7722
7723 if (VA.isRegLoc() && !VA.needsCustom()) {
7724 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7725 continue;
7726 }
7727
7728 // Vector arguments passed to VarArg functions need custom handling when
7729 // they are passed (at least partially) in GPRs.
7730 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7731 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7732 // Store value to its stack slot.
7733 SDValue PtrOff =
7734 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7735 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7736 SDValue Store =
7737 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7738 MemOpChains.push_back(Store);
7739 const unsigned OriginalValNo = VA.getValNo();
7740 // Then load the GPRs from the stack
7741 unsigned LoadOffset = 0;
7742 auto HandleCustomVecRegLoc = [&]() {
7743 assert(I != E && "Unexpected end of CCvalAssigns.");
7744 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7745 "Expected custom RegLoc.");
7746 CCValAssign RegVA = ArgLocs[I++];
7747 assert(RegVA.getValNo() == OriginalValNo &&
7748 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7749 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7750 DAG.getConstant(LoadOffset, dl, PtrVT));
7751 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7752 MemOpChains.push_back(Load.getValue(1));
7753 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7754 LoadOffset += PtrByteSize;
7755 };
7756
7757 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7758 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7759 // R10.
7760 HandleCustomVecRegLoc();
7761 HandleCustomVecRegLoc();
7762
7763 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7764 ArgLocs[I].getValNo() == OriginalValNo) {
7765 assert(!IsPPC64 &&
7766 "Only 2 custom RegLocs expected for 64-bit codegen.");
7767 HandleCustomVecRegLoc();
7768 HandleCustomVecRegLoc();
7769 }
7770
7771 continue;
7772 }
7773
7774 if (VA.isMemLoc()) {
7775 SDValue PtrOff =
7776 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7777 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7778 MemOpChains.push_back(
7779 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7780
7781 continue;
7782 }
7783
7784 if (!ValVT.isFloatingPoint())
7786 "Unexpected register handling for calling convention.");
7787
7788 // Custom handling is used for GPR initializations for vararg float
7789 // arguments.
7790 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7791 LocVT.isInteger() &&
7792 "Custom register handling only expected for VarArg.");
7793
7794 SDValue ArgAsInt =
7795 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7796
7797 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7798 // f32 in 32-bit GPR
7799 // f64 in 64-bit GPR
7800 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7801 else if (Arg.getValueType().getFixedSizeInBits() <
7802 LocVT.getFixedSizeInBits())
7803 // f32 in 64-bit GPR.
7804 RegsToPass.push_back(std::make_pair(
7805 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7806 else {
7807 // f64 in two 32-bit GPRs
7808 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7809 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7810 "Unexpected custom register for argument!");
7811 CCValAssign &GPR1 = VA;
7812 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7813 DAG.getConstant(32, dl, MVT::i8));
7814 RegsToPass.push_back(std::make_pair(
7815 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7816
7817 if (I != E) {
7818 // If only 1 GPR was available, there will only be one custom GPR and
7819 // the argument will also pass in memory.
7820 CCValAssign &PeekArg = ArgLocs[I];
7821 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7822 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7823 CCValAssign &GPR2 = ArgLocs[I++];
7824 RegsToPass.push_back(std::make_pair(
7825 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7826 }
7827 }
7828 }
7829 }
7830
7831 if (!MemOpChains.empty())
7832 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7833
7834 // For indirect calls, we need to save the TOC base to the stack for
7835 // restoration after the call.
7836 if (CFlags.IsIndirect) {
7837 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7838 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7839 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7840 const MVT PtrVT = Subtarget.getScalarIntVT();
7841 const unsigned TOCSaveOffset =
7842 Subtarget.getFrameLowering()->getTOCSaveOffset();
7843
7844 setUsesTOCBasePtr(DAG);
7845 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7846 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7847 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7848 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7849 Chain = DAG.getStore(
7850 Val.getValue(1), dl, Val, AddPtr,
7851 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7852 }
7853
7854 // Build a sequence of copy-to-reg nodes chained together with token chain
7855 // and flag operands which copy the outgoing args into the appropriate regs.
7856 SDValue InGlue;
7857 for (auto Reg : RegsToPass) {
7858 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7859 InGlue = Chain.getValue(1);
7860 }
7861
7862 const int SPDiff = 0;
7863 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7864 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7865}
7866
7867bool
7868PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7869 MachineFunction &MF, bool isVarArg,
7871 LLVMContext &Context) const {
7873 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7874 return CCInfo.CheckReturn(
7875 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7877 : RetCC_PPC);
7878}
7879
7880SDValue
7881PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7882 bool isVarArg,
7884 const SmallVectorImpl<SDValue> &OutVals,
7885 const SDLoc &dl, SelectionDAG &DAG) const {
7887 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7888 *DAG.getContext());
7889 CCInfo.AnalyzeReturn(Outs,
7890 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7892 : RetCC_PPC);
7893
7894 SDValue Glue;
7895 SmallVector<SDValue, 4> RetOps(1, Chain);
7896
7897 // Copy the result values into the output registers.
7898 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7899 CCValAssign &VA = RVLocs[i];
7900 assert(VA.isRegLoc() && "Can only return in registers!");
7901
7902 SDValue Arg = OutVals[RealResIdx];
7903
7904 switch (VA.getLocInfo()) {
7905 default: llvm_unreachable("Unknown loc info!");
7906 case CCValAssign::Full: break;
7907 case CCValAssign::AExt:
7908 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7909 break;
7910 case CCValAssign::ZExt:
7911 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7912 break;
7913 case CCValAssign::SExt:
7914 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7915 break;
7916 }
7917 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7918 bool isLittleEndian = Subtarget.isLittleEndian();
7919 // Legalize ret f64 -> ret 2 x i32.
7920 SDValue SVal =
7921 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7922 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7923 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7924 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7925 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7926 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7927 Glue = Chain.getValue(1);
7928 VA = RVLocs[++i]; // skip ahead to next loc
7929 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7930 } else
7931 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7932 Glue = Chain.getValue(1);
7933 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7934 }
7935
7936 RetOps[0] = Chain; // Update chain.
7937
7938 // Add the glue if we have it.
7939 if (Glue.getNode())
7940 RetOps.push_back(Glue);
7941
7942 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7943}
7944
7945SDValue
7946PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7947 SelectionDAG &DAG) const {
7948 SDLoc dl(Op);
7949
7950 // Get the correct type for integers.
7951 EVT IntVT = Op.getValueType();
7952
7953 // Get the inputs.
7954 SDValue Chain = Op.getOperand(0);
7955 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7956 // Build a DYNAREAOFFSET node.
7957 SDValue Ops[2] = {Chain, FPSIdx};
7958 SDVTList VTs = DAG.getVTList(IntVT);
7959 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7960}
7961
7962SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7963 SelectionDAG &DAG) const {
7964 // When we pop the dynamic allocation we need to restore the SP link.
7965 SDLoc dl(Op);
7966
7967 // Get the correct type for pointers.
7968 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7969
7970 // Construct the stack pointer operand.
7971 bool isPPC64 = Subtarget.isPPC64();
7972 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7973 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7974
7975 // Get the operands for the STACKRESTORE.
7976 SDValue Chain = Op.getOperand(0);
7977 SDValue SaveSP = Op.getOperand(1);
7978
7979 // Load the old link SP.
7980 SDValue LoadLinkSP =
7981 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7982
7983 // Restore the stack pointer.
7984 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7985
7986 // Store the old link SP.
7987 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7988}
7989
7990SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7992 bool isPPC64 = Subtarget.isPPC64();
7993 EVT PtrVT = getPointerTy(MF.getDataLayout());
7994
7995 // Get current frame pointer save index. The users of this index will be
7996 // primarily DYNALLOC instructions.
7998 int RASI = FI->getReturnAddrSaveIndex();
7999
8000 // If the frame pointer save index hasn't been defined yet.
8001 if (!RASI) {
8002 // Find out what the fix offset of the frame pointer save area.
8003 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
8004 // Allocate the frame index for frame pointer save area.
8005 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
8006 // Save the result.
8007 FI->setReturnAddrSaveIndex(RASI);
8008 }
8009 return DAG.getFrameIndex(RASI, PtrVT);
8010}
8011
8012SDValue
8013PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
8015 bool isPPC64 = Subtarget.isPPC64();
8016 EVT PtrVT = getPointerTy(MF.getDataLayout());
8017
8018 // Get current frame pointer save index. The users of this index will be
8019 // primarily DYNALLOC instructions.
8021 int FPSI = FI->getFramePointerSaveIndex();
8022
8023 // If the frame pointer save index hasn't been defined yet.
8024 if (!FPSI) {
8025 // Find out what the fix offset of the frame pointer save area.
8026 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8027 // Allocate the frame index for frame pointer save area.
8028 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8029 // Save the result.
8030 FI->setFramePointerSaveIndex(FPSI);
8031 }
8032 return DAG.getFrameIndex(FPSI, PtrVT);
8033}
8034
8035SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8036 SelectionDAG &DAG) const {
8038 // Get the inputs.
8039 SDValue Chain = Op.getOperand(0);
8040 SDValue Size = Op.getOperand(1);
8041 SDLoc dl(Op);
8042
8043 // Get the correct type for pointers.
8044 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8045 // Negate the size.
8046 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8047 DAG.getConstant(0, dl, PtrVT), Size);
8048 // Construct a node for the frame pointer save index.
8049 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8050 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8051 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8052 if (hasInlineStackProbe(MF))
8053 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8054 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8055}
8056
8057SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8058 SelectionDAG &DAG) const {
8060
8061 bool isPPC64 = Subtarget.isPPC64();
8062 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8063
8064 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8065 return DAG.getFrameIndex(FI, PtrVT);
8066}
8067
8068SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8069 SelectionDAG &DAG) const {
8070 SDLoc DL(Op);
8071 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8072 DAG.getVTList(MVT::i32, MVT::Other),
8073 Op.getOperand(0), Op.getOperand(1));
8074}
8075
8076SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8077 SelectionDAG &DAG) const {
8078 SDLoc DL(Op);
8079 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8080 Op.getOperand(0), Op.getOperand(1));
8081}
8082
8083SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8084 if (Op.getValueType().isVector())
8085 return LowerVectorLoad(Op, DAG);
8086
8087 assert(Op.getValueType() == MVT::i1 &&
8088 "Custom lowering only for i1 loads");
8089
8090 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8091
8092 SDLoc dl(Op);
8093 LoadSDNode *LD = cast<LoadSDNode>(Op);
8094
8095 SDValue Chain = LD->getChain();
8096 SDValue BasePtr = LD->getBasePtr();
8097 MachineMemOperand *MMO = LD->getMemOperand();
8098
8099 SDValue NewLD =
8100 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8101 BasePtr, MVT::i8, MMO);
8102 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8103
8104 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8105 return DAG.getMergeValues(Ops, dl);
8106}
8107
8108SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8109 if (Op.getOperand(1).getValueType().isVector())
8110 return LowerVectorStore(Op, DAG);
8111
8112 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8113 "Custom lowering only for i1 stores");
8114
8115 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8116
8117 SDLoc dl(Op);
8118 StoreSDNode *ST = cast<StoreSDNode>(Op);
8119
8120 SDValue Chain = ST->getChain();
8121 SDValue BasePtr = ST->getBasePtr();
8122 SDValue Value = ST->getValue();
8123 MachineMemOperand *MMO = ST->getMemOperand();
8124
8126 Value);
8127 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8128}
8129
8130// FIXME: Remove this once the ANDI glue bug is fixed:
8131SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8132 assert(Op.getValueType() == MVT::i1 &&
8133 "Custom lowering only for i1 results");
8134
8135 SDLoc DL(Op);
8136 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8137}
8138
8139SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8140 SelectionDAG &DAG) const {
8141
8142 // Implements a vector truncate that fits in a vector register as a shuffle.
8143 // We want to legalize vector truncates down to where the source fits in
8144 // a vector register (and target is therefore smaller than vector register
8145 // size). At that point legalization will try to custom lower the sub-legal
8146 // result and get here - where we can contain the truncate as a single target
8147 // operation.
8148
8149 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8150 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8151 //
8152 // We will implement it for big-endian ordering as this (where x denotes
8153 // undefined):
8154 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8155 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8156 //
8157 // The same operation in little-endian ordering will be:
8158 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8159 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8160
8161 EVT TrgVT = Op.getValueType();
8162 assert(TrgVT.isVector() && "Vector type expected.");
8163 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8164 EVT EltVT = TrgVT.getVectorElementType();
8165 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8166 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8167 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8168 return SDValue();
8169
8170 SDValue N1 = Op.getOperand(0);
8171 EVT SrcVT = N1.getValueType();
8172 unsigned SrcSize = SrcVT.getSizeInBits();
8173 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8174 !llvm::has_single_bit<uint32_t>(
8176 return SDValue();
8177 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8178 return SDValue();
8179
8180 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8181 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8182
8183 SDLoc DL(Op);
8184 SDValue Op1, Op2;
8185 if (SrcSize == 256) {
8186 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8187 EVT SplitVT =
8189 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8190 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8191 DAG.getConstant(0, DL, VecIdxTy));
8192 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8193 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8194 }
8195 else {
8196 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8197 Op2 = DAG.getUNDEF(WideVT);
8198 }
8199
8200 // First list the elements we want to keep.
8201 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8202 SmallVector<int, 16> ShuffV;
8203 if (Subtarget.isLittleEndian())
8204 for (unsigned i = 0; i < TrgNumElts; ++i)
8205 ShuffV.push_back(i * SizeMult);
8206 else
8207 for (unsigned i = 1; i <= TrgNumElts; ++i)
8208 ShuffV.push_back(i * SizeMult - 1);
8209
8210 // Populate the remaining elements with undefs.
8211 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8212 // ShuffV.push_back(i + WideNumElts);
8213 ShuffV.push_back(WideNumElts + 1);
8214
8215 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8216 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8217 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8218}
8219
8220/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8221/// possible.
8222SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8223 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8224 EVT ResVT = Op.getValueType();
8225 EVT CmpVT = Op.getOperand(0).getValueType();
8226 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8227 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8228 SDLoc dl(Op);
8229
8230 // Without power9-vector, we don't have native instruction for f128 comparison.
8231 // Following transformation to libcall is needed for setcc:
8232 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8233 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8234 SDValue Z = DAG.getSetCC(
8235 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8236 LHS, RHS, CC);
8237 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8238 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8239 }
8240
8241 // Not FP, or using SPE? Not a fsel.
8242 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8243 Subtarget.hasSPE())
8244 return Op;
8245
8246 SDNodeFlags Flags = Op.getNode()->getFlags();
8247
8248 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8249 // presence of infinities.
8250 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8251 switch (CC) {
8252 default:
8253 break;
8254 case ISD::SETOGT:
8255 case ISD::SETGT:
8256 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8257 case ISD::SETOLT:
8258 case ISD::SETLT:
8259 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8260 }
8261 }
8262
8263 // We might be able to do better than this under some circumstances, but in
8264 // general, fsel-based lowering of select is a finite-math-only optimization.
8265 // For more information, see section F.3 of the 2.06 ISA specification.
8266 // With ISA 3.0
8267 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8268 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8269 ResVT == MVT::f128)
8270 return Op;
8271
8272 // If the RHS of the comparison is a 0.0, we don't need to do the
8273 // subtraction at all.
8274 SDValue Sel1;
8275 if (isFloatingPointZero(RHS))
8276 switch (CC) {
8277 default: break; // SETUO etc aren't handled by fsel.
8278 case ISD::SETNE:
8279 std::swap(TV, FV);
8280 [[fallthrough]];
8281 case ISD::SETEQ:
8282 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8283 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8284 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8285 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8286 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8287 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8288 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8289 case ISD::SETULT:
8290 case ISD::SETLT:
8291 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8292 [[fallthrough]];
8293 case ISD::SETOGE:
8294 case ISD::SETGE:
8295 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8296 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8297 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8298 case ISD::SETUGT:
8299 case ISD::SETGT:
8300 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8301 [[fallthrough]];
8302 case ISD::SETOLE:
8303 case ISD::SETLE:
8304 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8305 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8306 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8307 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8308 }
8309
8310 SDValue Cmp;
8311 switch (CC) {
8312 default: break; // SETUO etc aren't handled by fsel.
8313 case ISD::SETNE:
8314 std::swap(TV, FV);
8315 [[fallthrough]];
8316 case ISD::SETEQ:
8317 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8318 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8319 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8320 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8321 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8322 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8323 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8324 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8325 case ISD::SETULT:
8326 case ISD::SETLT:
8327 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8328 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8329 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8330 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8331 case ISD::SETOGE:
8332 case ISD::SETGE:
8333 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8334 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8335 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8336 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8337 case ISD::SETUGT:
8338 case ISD::SETGT:
8339 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8340 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8341 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8342 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8343 case ISD::SETOLE:
8344 case ISD::SETLE:
8345 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8346 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8347 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8348 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8349 }
8350 return Op;
8351}
8352
8353static unsigned getPPCStrictOpcode(unsigned Opc) {
8354 switch (Opc) {
8355 default:
8356 llvm_unreachable("No strict version of this opcode!");
8357 case PPCISD::FCTIDZ:
8358 return PPCISD::STRICT_FCTIDZ;
8359 case PPCISD::FCTIWZ:
8360 return PPCISD::STRICT_FCTIWZ;
8361 case PPCISD::FCTIDUZ:
8363 case PPCISD::FCTIWUZ:
8365 case PPCISD::FCFID:
8366 return PPCISD::STRICT_FCFID;
8367 case PPCISD::FCFIDU:
8368 return PPCISD::STRICT_FCFIDU;
8369 case PPCISD::FCFIDS:
8370 return PPCISD::STRICT_FCFIDS;
8371 case PPCISD::FCFIDUS:
8373 }
8374}
8375
8377 const PPCSubtarget &Subtarget) {
8378 SDLoc dl(Op);
8379 bool IsStrict = Op->isStrictFPOpcode();
8380 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8381 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8382
8383 // TODO: Any other flags to propagate?
8384 SDNodeFlags Flags;
8385 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8386
8387 // For strict nodes, source is the second operand.
8388 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8389 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8390 MVT DestTy = Op.getSimpleValueType();
8391 assert(Src.getValueType().isFloatingPoint() &&
8392 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8393 DestTy == MVT::i64) &&
8394 "Invalid FP_TO_INT types");
8395 if (Src.getValueType() == MVT::f32) {
8396 if (IsStrict) {
8397 Src =
8399 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8400 Chain = Src.getValue(1);
8401 } else
8402 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8403 }
8404 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8405 DestTy = Subtarget.getScalarIntVT();
8406 unsigned Opc = ISD::DELETED_NODE;
8407 switch (DestTy.SimpleTy) {
8408 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8409 case MVT::i32:
8410 Opc = IsSigned ? PPCISD::FCTIWZ
8411 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8412 break;
8413 case MVT::i64:
8414 assert((IsSigned || Subtarget.hasFPCVT()) &&
8415 "i64 FP_TO_UINT is supported only with FPCVT");
8416 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8417 }
8418 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8419 SDValue Conv;
8420 if (IsStrict) {
8421 Opc = getPPCStrictOpcode(Opc);
8422 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8423 Flags);
8424 } else {
8425 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8426 }
8427 return Conv;
8428}
8429
8430void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8431 SelectionDAG &DAG,
8432 const SDLoc &dl) const {
8433 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8434 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8435 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8436 bool IsStrict = Op->isStrictFPOpcode();
8437
8438 // Convert the FP value to an int value through memory.
8439 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8440 (IsSigned || Subtarget.hasFPCVT());
8441 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8442 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8443 MachinePointerInfo MPI =
8445
8446 // Emit a store to the stack slot.
8447 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8448 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8449 if (i32Stack) {
8451 Alignment = Align(4);
8452 MachineMemOperand *MMO =
8453 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8454 SDValue Ops[] = { Chain, Tmp, FIPtr };
8455 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8456 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8457 } else
8458 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8459
8460 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8461 // add in a bias on big endian.
8462 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8463 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8464 DAG.getConstant(4, dl, FIPtr.getValueType()));
8465 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8466 }
8467
8468 RLI.Chain = Chain;
8469 RLI.Ptr = FIPtr;
8470 RLI.MPI = MPI;
8471 RLI.Alignment = Alignment;
8472}
8473
8474/// Custom lowers floating point to integer conversions to use
8475/// the direct move instructions available in ISA 2.07 to avoid the
8476/// need for load/store combinations.
8477SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8478 SelectionDAG &DAG,
8479 const SDLoc &dl) const {
8480 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8481 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8482 if (Op->isStrictFPOpcode())
8483 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8484 else
8485 return Mov;
8486}
8487
8488SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8489 const SDLoc &dl) const {
8490 bool IsStrict = Op->isStrictFPOpcode();
8491 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8492 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8493 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8494 EVT SrcVT = Src.getValueType();
8495 EVT DstVT = Op.getValueType();
8496
8497 // FP to INT conversions are legal for f128.
8498 if (SrcVT == MVT::f128)
8499 return Subtarget.hasP9Vector() ? Op : SDValue();
8500
8501 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8502 // PPC (the libcall is not available).
8503 if (SrcVT == MVT::ppcf128) {
8504 if (DstVT == MVT::i32) {
8505 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8506 // set other fast-math flags to FP operations in both strict and
8507 // non-strict cases. (FP_TO_SINT, FSUB)
8509 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8510
8511 if (IsSigned) {
8512 SDValue Lo, Hi;
8513 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8514
8515 // Add the two halves of the long double in round-to-zero mode, and use
8516 // a smaller FP_TO_SINT.
8517 if (IsStrict) {
8519 DAG.getVTList(MVT::f64, MVT::Other),
8520 {Op.getOperand(0), Lo, Hi}, Flags);
8521 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8522 DAG.getVTList(MVT::i32, MVT::Other),
8523 {Res.getValue(1), Res}, Flags);
8524 } else {
8525 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8526 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8527 }
8528 } else {
8529 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8530 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8531 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8532 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8533 if (IsStrict) {
8534 // Sel = Src < 0x80000000
8535 // FltOfs = select Sel, 0.0, 0x80000000
8536 // IntOfs = select Sel, 0, 0x80000000
8537 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8538 SDValue Chain = Op.getOperand(0);
8539 EVT SetCCVT =
8540 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8541 EVT DstSetCCVT =
8542 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8543 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8544 Chain, true);
8545 Chain = Sel.getValue(1);
8546
8547 SDValue FltOfs = DAG.getSelect(
8548 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8549 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8550
8551 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8552 DAG.getVTList(SrcVT, MVT::Other),
8553 {Chain, Src, FltOfs}, Flags);
8554 Chain = Val.getValue(1);
8555 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8556 DAG.getVTList(DstVT, MVT::Other),
8557 {Chain, Val}, Flags);
8558 Chain = SInt.getValue(1);
8559 SDValue IntOfs = DAG.getSelect(
8560 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8561 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8562 return DAG.getMergeValues({Result, Chain}, dl);
8563 } else {
8564 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8565 // FIXME: generated code sucks.
8566 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8567 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8568 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8569 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8570 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8571 }
8572 }
8573 }
8574
8575 return SDValue();
8576 }
8577
8578 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8579 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8580
8581 ReuseLoadInfo RLI;
8582 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8583
8584 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8585 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8586}
8587
8588// We're trying to insert a regular store, S, and then a load, L. If the
8589// incoming value, O, is a load, we might just be able to have our load use the
8590// address used by O. However, we don't know if anything else will store to
8591// that address before we can load from it. To prevent this situation, we need
8592// to insert our load, L, into the chain as a peer of O. To do this, we give L
8593// the same chain operand as O, we create a token factor from the chain results
8594// of O and L, and we replace all uses of O's chain result with that token
8595// factor (see spliceIntoChain below for this last part).
8596bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8597 ReuseLoadInfo &RLI,
8598 SelectionDAG &DAG,
8599 ISD::LoadExtType ET) const {
8600 // Conservatively skip reusing for constrained FP nodes.
8601 if (Op->isStrictFPOpcode())
8602 return false;
8603
8604 SDLoc dl(Op);
8605 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8606 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8607 if (ET == ISD::NON_EXTLOAD &&
8608 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8609 isOperationLegalOrCustom(Op.getOpcode(),
8610 Op.getOperand(0).getValueType())) {
8611
8612 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8613 return true;
8614 }
8615
8616 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8617 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8618 LD->isNonTemporal())
8619 return false;
8620 if (LD->getMemoryVT() != MemVT)
8621 return false;
8622
8623 // If the result of the load is an illegal type, then we can't build a
8624 // valid chain for reuse since the legalised loads and token factor node that
8625 // ties the legalised loads together uses a different output chain then the
8626 // illegal load.
8627 if (!isTypeLegal(LD->getValueType(0)))
8628 return false;
8629
8630 RLI.Ptr = LD->getBasePtr();
8631 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8632 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8633 "Non-pre-inc AM on PPC?");
8634 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8635 LD->getOffset());
8636 }
8637
8638 RLI.Chain = LD->getChain();
8639 RLI.MPI = LD->getPointerInfo();
8640 RLI.IsDereferenceable = LD->isDereferenceable();
8641 RLI.IsInvariant = LD->isInvariant();
8642 RLI.Alignment = LD->getAlign();
8643 RLI.AAInfo = LD->getAAInfo();
8644 RLI.Ranges = LD->getRanges();
8645
8646 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8647 return true;
8648}
8649
8650// Given the head of the old chain, ResChain, insert a token factor containing
8651// it and NewResChain, and make users of ResChain now be users of that token
8652// factor.
8653// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8654void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8655 SDValue NewResChain,
8656 SelectionDAG &DAG) const {
8657 if (!ResChain)
8658 return;
8659
8660 SDLoc dl(NewResChain);
8661
8662 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8663 NewResChain, DAG.getUNDEF(MVT::Other));
8664 assert(TF.getNode() != NewResChain.getNode() &&
8665 "A new TF really is required here");
8666
8667 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8668 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8669}
8670
8671/// Analyze profitability of direct move
8672/// prefer float load to int load plus direct move
8673/// when there is no integer use of int load
8674bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8675 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8676 if (Origin->getOpcode() != ISD::LOAD)
8677 return true;
8678
8679 // If there is no LXSIBZX/LXSIHZX, like Power8,
8680 // prefer direct move if the memory size is 1 or 2 bytes.
8681 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8682 if (!Subtarget.hasP9Vector() &&
8683 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8684 return true;
8685
8686 for (SDUse &Use : Origin->uses()) {
8687
8688 // Only look at the users of the loaded value.
8689 if (Use.getResNo() != 0)
8690 continue;
8691
8692 SDNode *User = Use.getUser();
8693 if (User->getOpcode() != ISD::SINT_TO_FP &&
8694 User->getOpcode() != ISD::UINT_TO_FP &&
8695 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8696 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8697 return true;
8698 }
8699
8700 return false;
8701}
8702
8704 const PPCSubtarget &Subtarget,
8705 SDValue Chain = SDValue()) {
8706 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8707 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8708 SDLoc dl(Op);
8709
8710 // TODO: Any other flags to propagate?
8711 SDNodeFlags Flags;
8712 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8713
8714 // If we have FCFIDS, then use it when converting to single-precision.
8715 // Otherwise, convert to double-precision and then round.
8716 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8717 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8718 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8719 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8720 if (Op->isStrictFPOpcode()) {
8721 if (!Chain)
8722 Chain = Op.getOperand(0);
8723 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8724 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8725 } else
8726 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8727}
8728
8729/// Custom lowers integer to floating point conversions to use
8730/// the direct move instructions available in ISA 2.07 to avoid the
8731/// need for load/store combinations.
8732SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8733 SelectionDAG &DAG,
8734 const SDLoc &dl) const {
8735 assert((Op.getValueType() == MVT::f32 ||
8736 Op.getValueType() == MVT::f64) &&
8737 "Invalid floating point type as target of conversion");
8738 assert(Subtarget.hasFPCVT() &&
8739 "Int to FP conversions with direct moves require FPCVT");
8740 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8741 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8742 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8743 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8744 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8745 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8746 return convertIntToFP(Op, Mov, DAG, Subtarget);
8747}
8748
8749static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8750
8751 EVT VecVT = Vec.getValueType();
8752 assert(VecVT.isVector() && "Expected a vector type.");
8753 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8754
8755 EVT EltVT = VecVT.getVectorElementType();
8756 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8757 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8758
8759 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8760 SmallVector<SDValue, 16> Ops(NumConcat);
8761 Ops[0] = Vec;
8762 SDValue UndefVec = DAG.getUNDEF(VecVT);
8763 for (unsigned i = 1; i < NumConcat; ++i)
8764 Ops[i] = UndefVec;
8765
8766 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8767}
8768
8769SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8770 const SDLoc &dl) const {
8771 bool IsStrict = Op->isStrictFPOpcode();
8772 unsigned Opc = Op.getOpcode();
8773 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8774 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8776 "Unexpected conversion type");
8777 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8778 "Supports conversions to v2f64/v4f32 only.");
8779
8780 // TODO: Any other flags to propagate?
8782 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8783
8784 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8785 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8786
8787 SDValue Wide = widenVec(DAG, Src, dl);
8788 EVT WideVT = Wide.getValueType();
8789 unsigned WideNumElts = WideVT.getVectorNumElements();
8790 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8791
8792 SmallVector<int, 16> ShuffV;
8793 for (unsigned i = 0; i < WideNumElts; ++i)
8794 ShuffV.push_back(i + WideNumElts);
8795
8796 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8797 int SaveElts = FourEltRes ? 4 : 2;
8798 if (Subtarget.isLittleEndian())
8799 for (int i = 0; i < SaveElts; i++)
8800 ShuffV[i * Stride] = i;
8801 else
8802 for (int i = 1; i <= SaveElts; i++)
8803 ShuffV[i * Stride - 1] = i - 1;
8804
8805 SDValue ShuffleSrc2 =
8806 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8807 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8808
8809 SDValue Extend;
8810 if (SignedConv) {
8811 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8812 EVT ExtVT = Src.getValueType();
8813 if (Subtarget.hasP9Altivec())
8814 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8815 IntermediateVT.getVectorNumElements());
8816
8817 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8818 DAG.getValueType(ExtVT));
8819 } else
8820 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8821
8822 if (IsStrict)
8823 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8824 {Op.getOperand(0), Extend}, Flags);
8825
8826 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8827}
8828
8829SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8830 SelectionDAG &DAG) const {
8831 SDLoc dl(Op);
8832 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8833 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8834 bool IsStrict = Op->isStrictFPOpcode();
8835 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8836 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8837
8838 // TODO: Any other flags to propagate?
8840 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8841
8842 EVT InVT = Src.getValueType();
8843 EVT OutVT = Op.getValueType();
8844 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8845 isOperationCustom(Op.getOpcode(), InVT))
8846 return LowerINT_TO_FPVector(Op, DAG, dl);
8847
8848 // Conversions to f128 are legal.
8849 if (Op.getValueType() == MVT::f128)
8850 return Subtarget.hasP9Vector() ? Op : SDValue();
8851
8852 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8853 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8854 return SDValue();
8855
8856 if (Src.getValueType() == MVT::i1) {
8857 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8858 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8859 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8860 if (IsStrict)
8861 return DAG.getMergeValues({Sel, Chain}, dl);
8862 else
8863 return Sel;
8864 }
8865
8866 // If we have direct moves, we can do all the conversion, skip the store/load
8867 // however, without FPCVT we can't do most conversions.
8868 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8869 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8870 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8871
8872 assert((IsSigned || Subtarget.hasFPCVT()) &&
8873 "UINT_TO_FP is supported only with FPCVT");
8874
8875 if (Src.getValueType() == MVT::i64) {
8876 SDValue SINT = Src;
8877 // When converting to single-precision, we actually need to convert
8878 // to double-precision first and then round to single-precision.
8879 // To avoid double-rounding effects during that operation, we have
8880 // to prepare the input operand. Bits that might be truncated when
8881 // converting to double-precision are replaced by a bit that won't
8882 // be lost at this stage, but is below the single-precision rounding
8883 // position.
8884 //
8885 // However, if -enable-unsafe-fp-math is in effect, accept double
8886 // rounding to avoid the extra overhead.
8887 if (Op.getValueType() == MVT::f32 &&
8888 !Subtarget.hasFPCVT() &&
8890
8891 // Twiddle input to make sure the low 11 bits are zero. (If this
8892 // is the case, we are guaranteed the value will fit into the 53 bit
8893 // mantissa of an IEEE double-precision value without rounding.)
8894 // If any of those low 11 bits were not zero originally, make sure
8895 // bit 12 (value 2048) is set instead, so that the final rounding
8896 // to single-precision gets the correct result.
8897 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8898 SINT, DAG.getConstant(2047, dl, MVT::i64));
8899 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8900 Round, DAG.getConstant(2047, dl, MVT::i64));
8901 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8902 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8903 Round, DAG.getConstant(-2048, dl, MVT::i64));
8904
8905 // However, we cannot use that value unconditionally: if the magnitude
8906 // of the input value is small, the bit-twiddling we did above might
8907 // end up visibly changing the output. Fortunately, in that case, we
8908 // don't need to twiddle bits since the original input will convert
8909 // exactly to double-precision floating-point already. Therefore,
8910 // construct a conditional to use the original value if the top 11
8911 // bits are all sign-bit copies, and use the rounded value computed
8912 // above otherwise.
8913 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8914 SINT, DAG.getConstant(53, dl, MVT::i32));
8915 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8916 Cond, DAG.getConstant(1, dl, MVT::i64));
8917 Cond = DAG.getSetCC(
8918 dl,
8919 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8920 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8921
8922 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8923 }
8924
8925 ReuseLoadInfo RLI;
8926 SDValue Bits;
8927
8929 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8930 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8931 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8932 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8933 } else if (Subtarget.hasLFIWAX() &&
8934 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8935 MachineMemOperand *MMO =
8937 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8938 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8940 DAG.getVTList(MVT::f64, MVT::Other),
8941 Ops, MVT::i32, MMO);
8942 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8943 } else if (Subtarget.hasFPCVT() &&
8944 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8945 MachineMemOperand *MMO =
8947 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8948 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8950 DAG.getVTList(MVT::f64, MVT::Other),
8951 Ops, MVT::i32, MMO);
8952 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8953 } else if (((Subtarget.hasLFIWAX() &&
8954 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8955 (Subtarget.hasFPCVT() &&
8956 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8957 SINT.getOperand(0).getValueType() == MVT::i32) {
8958 MachineFrameInfo &MFI = MF.getFrameInfo();
8959 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8960
8961 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8962 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8963
8964 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8966 DAG.getMachineFunction(), FrameIdx));
8967 Chain = Store;
8968
8969 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8970 "Expected an i32 store");
8971
8972 RLI.Ptr = FIdx;
8973 RLI.Chain = Chain;
8974 RLI.MPI =
8976 RLI.Alignment = Align(4);
8977
8978 MachineMemOperand *MMO =
8980 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8981 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8984 dl, DAG.getVTList(MVT::f64, MVT::Other),
8985 Ops, MVT::i32, MMO);
8986 Chain = Bits.getValue(1);
8987 } else
8988 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8989
8990 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8991 if (IsStrict)
8992 Chain = FP.getValue(1);
8993
8994 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8995 if (IsStrict)
8996 FP = DAG.getNode(
8997 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8998 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8999 Flags);
9000 else
9001 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9002 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9003 }
9004 return FP;
9005 }
9006
9007 assert(Src.getValueType() == MVT::i32 &&
9008 "Unhandled INT_TO_FP type in custom expander!");
9009 // Since we only generate this in 64-bit mode, we can take advantage of
9010 // 64-bit registers. In particular, sign extend the input value into the
9011 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
9012 // then lfd it and fcfid it.
9014 MachineFrameInfo &MFI = MF.getFrameInfo();
9015 EVT PtrVT = getPointerTy(MF.getDataLayout());
9016
9017 SDValue Ld;
9018 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
9019 ReuseLoadInfo RLI;
9020 bool ReusingLoad;
9021 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
9022 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
9023 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9024
9025 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9027 DAG.getMachineFunction(), FrameIdx));
9028 Chain = Store;
9029
9030 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9031 "Expected an i32 store");
9032
9033 RLI.Ptr = FIdx;
9034 RLI.Chain = Chain;
9035 RLI.MPI =
9037 RLI.Alignment = Align(4);
9038 }
9039
9040 MachineMemOperand *MMO =
9042 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9043 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9044 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9045 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9046 MVT::i32, MMO);
9047 Chain = Ld.getValue(1);
9048 if (ReusingLoad)
9049 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
9050 } else {
9051 assert(Subtarget.isPPC64() &&
9052 "i32->FP without LFIWAX supported only on PPC64");
9053
9054 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9055 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9056
9057 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9058
9059 // STD the extended value into the stack slot.
9060 SDValue Store = DAG.getStore(
9061 Chain, dl, Ext64, FIdx,
9063 Chain = Store;
9064
9065 // Load the value as a double.
9066 Ld = DAG.getLoad(
9067 MVT::f64, dl, Chain, FIdx,
9069 Chain = Ld.getValue(1);
9070 }
9071
9072 // FCFID it and return it.
9073 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9074 if (IsStrict)
9075 Chain = FP.getValue(1);
9076 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9077 if (IsStrict)
9078 FP = DAG.getNode(
9079 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9080 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9081 else
9082 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9083 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9084 }
9085 return FP;
9086}
9087
9088SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9089 SelectionDAG &DAG) const {
9090 SDLoc Dl(Op);
9092 EVT PtrVT = getPointerTy(MF.getDataLayout());
9093 SDValue Chain = Op.getOperand(0);
9094
9095 // If requested mode is constant, just use simpler mtfsb/mffscrni
9096 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9097 uint64_t Mode = CVal->getZExtValue();
9098 assert(Mode < 4 && "Unsupported rounding mode!");
9099 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9100 if (Subtarget.isISA3_0())
9101 return SDValue(
9102 DAG.getMachineNode(
9103 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9104 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9105 1);
9106 SDNode *SetHi = DAG.getMachineNode(
9107 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9108 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9109 SDNode *SetLo = DAG.getMachineNode(
9110 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9111 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9112 return SDValue(SetLo, 0);
9113 }
9114
9115 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9116 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9117 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9118 DAG.getConstant(3, Dl, MVT::i32));
9119 SDValue DstFlag = DAG.getNode(
9120 ISD::XOR, Dl, MVT::i32, SrcFlag,
9121 DAG.getNode(ISD::AND, Dl, MVT::i32,
9122 DAG.getNOT(Dl,
9123 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9124 MVT::i32),
9125 One));
9126 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9127 SDValue MFFS;
9128 if (!Subtarget.isISA3_0()) {
9129 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9130 Chain = MFFS.getValue(1);
9131 }
9132 SDValue NewFPSCR;
9133 if (Subtarget.isPPC64()) {
9134 if (Subtarget.isISA3_0()) {
9135 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9136 } else {
9137 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9138 SDNode *InsertRN = DAG.getMachineNode(
9139 PPC::RLDIMI, Dl, MVT::i64,
9140 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9141 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9142 DAG.getTargetConstant(0, Dl, MVT::i32),
9143 DAG.getTargetConstant(62, Dl, MVT::i32)});
9144 NewFPSCR = SDValue(InsertRN, 0);
9145 }
9146 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9147 } else {
9148 // In 32-bit mode, store f64, load and update the lower half.
9149 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9150 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9151 SDValue Addr = Subtarget.isLittleEndian()
9152 ? StackSlot
9153 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9154 DAG.getConstant(4, Dl, PtrVT));
9155 if (Subtarget.isISA3_0()) {
9156 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9157 } else {
9158 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9159 SDValue Tmp =
9160 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9161 Chain = Tmp.getValue(1);
9162 Tmp = SDValue(DAG.getMachineNode(
9163 PPC::RLWIMI, Dl, MVT::i32,
9164 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9165 DAG.getTargetConstant(30, Dl, MVT::i32),
9166 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9167 0);
9168 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9169 }
9170 NewFPSCR =
9171 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9172 Chain = NewFPSCR.getValue(1);
9173 }
9174 if (Subtarget.isISA3_0())
9175 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9176 {NewFPSCR, Chain}),
9177 1);
9178 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9179 SDNode *MTFSF = DAG.getMachineNode(
9180 PPC::MTFSF, Dl, MVT::Other,
9181 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9182 return SDValue(MTFSF, 0);
9183}
9184
9185SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9186 SelectionDAG &DAG) const {
9187 SDLoc dl(Op);
9188 /*
9189 The rounding mode is in bits 30:31 of FPSR, and has the following
9190 settings:
9191 00 Round to nearest
9192 01 Round to 0
9193 10 Round to +inf
9194 11 Round to -inf
9195
9196 GET_ROUNDING, on the other hand, expects the following:
9197 -1 Undefined
9198 0 Round to 0
9199 1 Round to nearest
9200 2 Round to +inf
9201 3 Round to -inf
9202
9203 To perform the conversion, we do:
9204 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9205 */
9206
9208 EVT VT = Op.getValueType();
9209 EVT PtrVT = getPointerTy(MF.getDataLayout());
9210
9211 // Save FP Control Word to register
9212 SDValue Chain = Op.getOperand(0);
9213 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9214 Chain = MFFS.getValue(1);
9215
9216 SDValue CWD;
9217 if (isTypeLegal(MVT::i64)) {
9218 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9219 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9220 } else {
9221 // Save FP register to stack slot
9222 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9223 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9224 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9225
9226 // Load FP Control Word from low 32 bits of stack slot.
9228 "Stack slot adjustment is valid only on big endian subtargets!");
9229 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9230 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9231 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9232 Chain = CWD.getValue(1);
9233 }
9234
9235 // Transform as necessary
9236 SDValue CWD1 =
9237 DAG.getNode(ISD::AND, dl, MVT::i32,
9238 CWD, DAG.getConstant(3, dl, MVT::i32));
9239 SDValue CWD2 =
9240 DAG.getNode(ISD::SRL, dl, MVT::i32,
9241 DAG.getNode(ISD::AND, dl, MVT::i32,
9242 DAG.getNode(ISD::XOR, dl, MVT::i32,
9243 CWD, DAG.getConstant(3, dl, MVT::i32)),
9244 DAG.getConstant(3, dl, MVT::i32)),
9245 DAG.getConstant(1, dl, MVT::i32));
9246
9247 SDValue RetVal =
9248 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9249
9250 RetVal =
9252 dl, VT, RetVal);
9253
9254 return DAG.getMergeValues({RetVal, Chain}, dl);
9255}
9256
9257SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9258 EVT VT = Op.getValueType();
9259 unsigned BitWidth = VT.getSizeInBits();
9260 SDLoc dl(Op);
9261 assert(Op.getNumOperands() == 3 &&
9262 VT == Op.getOperand(1).getValueType() &&
9263 "Unexpected SHL!");
9264
9265 // Expand into a bunch of logical ops. Note that these ops
9266 // depend on the PPC behavior for oversized shift amounts.
9267 SDValue Lo = Op.getOperand(0);
9268 SDValue Hi = Op.getOperand(1);
9269 SDValue Amt = Op.getOperand(2);
9270 EVT AmtVT = Amt.getValueType();
9271
9272 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9273 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9274 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9275 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9276 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9277 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9278 DAG.getConstant(-BitWidth, dl, AmtVT));
9279 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9280 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9281 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9282 SDValue OutOps[] = { OutLo, OutHi };
9283 return DAG.getMergeValues(OutOps, dl);
9284}
9285
9286SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9287 EVT VT = Op.getValueType();
9288 SDLoc dl(Op);
9289 unsigned BitWidth = VT.getSizeInBits();
9290 assert(Op.getNumOperands() == 3 &&
9291 VT == Op.getOperand(1).getValueType() &&
9292 "Unexpected SRL!");
9293
9294 // Expand into a bunch of logical ops. Note that these ops
9295 // depend on the PPC behavior for oversized shift amounts.
9296 SDValue Lo = Op.getOperand(0);
9297 SDValue Hi = Op.getOperand(1);
9298 SDValue Amt = Op.getOperand(2);
9299 EVT AmtVT = Amt.getValueType();
9300
9301 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9302 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9303 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9304 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9305 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9306 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9307 DAG.getConstant(-BitWidth, dl, AmtVT));
9308 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9309 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9310 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9311 SDValue OutOps[] = { OutLo, OutHi };
9312 return DAG.getMergeValues(OutOps, dl);
9313}
9314
9315SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9316 SDLoc dl(Op);
9317 EVT VT = Op.getValueType();
9318 unsigned BitWidth = VT.getSizeInBits();
9319 assert(Op.getNumOperands() == 3 &&
9320 VT == Op.getOperand(1).getValueType() &&
9321 "Unexpected SRA!");
9322
9323 // Expand into a bunch of logical ops, followed by a select_cc.
9324 SDValue Lo = Op.getOperand(0);
9325 SDValue Hi = Op.getOperand(1);
9326 SDValue Amt = Op.getOperand(2);
9327 EVT AmtVT = Amt.getValueType();
9328
9329 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9330 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9331 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9332 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9333 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9334 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9335 DAG.getConstant(-BitWidth, dl, AmtVT));
9336 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9337 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9338 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9339 Tmp4, Tmp6, ISD::SETLE);
9340 SDValue OutOps[] = { OutLo, OutHi };
9341 return DAG.getMergeValues(OutOps, dl);
9342}
9343
9344SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9345 SelectionDAG &DAG) const {
9346 SDLoc dl(Op);
9347 EVT VT = Op.getValueType();
9348 unsigned BitWidth = VT.getSizeInBits();
9349
9350 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9351 SDValue X = Op.getOperand(0);
9352 SDValue Y = Op.getOperand(1);
9353 SDValue Z = Op.getOperand(2);
9354 EVT AmtVT = Z.getValueType();
9355
9356 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9357 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9358 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9359 // on PowerPC shift by BW being well defined.
9360 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9361 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9362 SDValue SubZ =
9363 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9364 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9365 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9366 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9367}
9368
9369//===----------------------------------------------------------------------===//
9370// Vector related lowering.
9371//
9372
9373/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9374/// element size of SplatSize. Cast the result to VT.
9375static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9376 SelectionDAG &DAG, const SDLoc &dl) {
9377 static const MVT VTys[] = { // canonical VT to use for each size.
9378 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9379 };
9380
9381 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9382
9383 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9384 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9385 SplatSize = 1;
9386 Val = 0xFF;
9387 }
9388
9389 EVT CanonicalVT = VTys[SplatSize-1];
9390
9391 // Build a canonical splat for this value.
9392 // Explicitly truncate APInt here, as this API is used with a mix of
9393 // signed and unsigned values.
9394 return DAG.getBitcast(
9395 ReqVT,
9396 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9397}
9398
9399/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9400/// specified intrinsic ID.
9402 const SDLoc &dl, EVT DestVT = MVT::Other) {
9403 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9404 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9405 DAG.getConstant(IID, dl, MVT::i32), Op);
9406}
9407
9408/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9409/// specified intrinsic ID.
9410static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9411 SelectionDAG &DAG, const SDLoc &dl,
9412 EVT DestVT = MVT::Other) {
9413 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9414 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9415 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9416}
9417
9418/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9419/// specified intrinsic ID.
9420static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9421 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9422 EVT DestVT = MVT::Other) {
9423 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9424 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9425 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9426}
9427
9428/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9429/// amount. The result has the specified value type.
9430static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9431 SelectionDAG &DAG, const SDLoc &dl) {
9432 // Force LHS/RHS to be the right type.
9433 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9434 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9435
9436 int Ops[16];
9437 for (unsigned i = 0; i != 16; ++i)
9438 Ops[i] = i + Amt;
9439 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9440 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9441}
9442
9443/// Do we have an efficient pattern in a .td file for this node?
9444///
9445/// \param V - pointer to the BuildVectorSDNode being matched
9446/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9447///
9448/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9449/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9450/// the opposite is true (expansion is beneficial) are:
9451/// - The node builds a vector out of integers that are not 32 or 64-bits
9452/// - The node builds a vector out of constants
9453/// - The node is a "load-and-splat"
9454/// In all other cases, we will choose to keep the BUILD_VECTOR.
9456 bool HasDirectMove,
9457 bool HasP8Vector) {
9458 EVT VecVT = V->getValueType(0);
9459 bool RightType = VecVT == MVT::v2f64 ||
9460 (HasP8Vector && VecVT == MVT::v4f32) ||
9461 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9462 if (!RightType)
9463 return false;
9464
9465 bool IsSplat = true;
9466 bool IsLoad = false;
9467 SDValue Op0 = V->getOperand(0);
9468
9469 // This function is called in a block that confirms the node is not a constant
9470 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9471 // different constants.
9472 if (V->isConstant())
9473 return false;
9474 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9475 if (V->getOperand(i).isUndef())
9476 return false;
9477 // We want to expand nodes that represent load-and-splat even if the
9478 // loaded value is a floating point truncation or conversion to int.
9479 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9480 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9481 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9482 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9483 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9484 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9485 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9486 IsLoad = true;
9487 // If the operands are different or the input is not a load and has more
9488 // uses than just this BV node, then it isn't a splat.
9489 if (V->getOperand(i) != Op0 ||
9490 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9491 IsSplat = false;
9492 }
9493 return !(IsSplat && IsLoad);
9494}
9495
9496// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9497SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9498
9499 SDLoc dl(Op);
9500 SDValue Op0 = Op->getOperand(0);
9501
9502 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9503 (Op.getValueType() != MVT::f128))
9504 return SDValue();
9505
9506 SDValue Lo = Op0.getOperand(0);
9507 SDValue Hi = Op0.getOperand(1);
9508 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9509 return SDValue();
9510
9511 if (!Subtarget.isLittleEndian())
9512 std::swap(Lo, Hi);
9513
9514 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9515}
9516
9517static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9518 const SDValue *InputLoad = &Op;
9519 while (InputLoad->getOpcode() == ISD::BITCAST)
9520 InputLoad = &InputLoad->getOperand(0);
9521 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9523 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9524 InputLoad = &InputLoad->getOperand(0);
9525 }
9526 if (InputLoad->getOpcode() != ISD::LOAD)
9527 return nullptr;
9528 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9529 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9530}
9531
9532// Convert the argument APFloat to a single precision APFloat if there is no
9533// loss in information during the conversion to single precision APFloat and the
9534// resulting number is not a denormal number. Return true if successful.
9536 APFloat APFloatToConvert = ArgAPFloat;
9537 bool LosesInfo = true;
9539 &LosesInfo);
9540 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9541 if (Success)
9542 ArgAPFloat = APFloatToConvert;
9543 return Success;
9544}
9545
9546// Bitcast the argument APInt to a double and convert it to a single precision
9547// APFloat, bitcast the APFloat to an APInt and assign it to the original
9548// argument if there is no loss in information during the conversion from
9549// double to single precision APFloat and the resulting number is not a denormal
9550// number. Return true if successful.
9552 double DpValue = ArgAPInt.bitsToDouble();
9553 APFloat APFloatDp(DpValue);
9554 bool Success = convertToNonDenormSingle(APFloatDp);
9555 if (Success)
9556 ArgAPInt = APFloatDp.bitcastToAPInt();
9557 return Success;
9558}
9559
9560// Nondestructive check for convertTonNonDenormSingle.
9562 // Only convert if it loses info, since XXSPLTIDP should
9563 // handle the other case.
9564 APFloat APFloatToConvert = ArgAPFloat;
9565 bool LosesInfo = true;
9567 &LosesInfo);
9568
9569 return (!LosesInfo && !APFloatToConvert.isDenormal());
9570}
9571
9572static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9573 unsigned &Opcode) {
9574 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9575 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9576 return false;
9577
9578 EVT Ty = Op->getValueType(0);
9579 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9580 // as we cannot handle extending loads for these types.
9581 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9582 ISD::isNON_EXTLoad(InputNode))
9583 return true;
9584
9585 EVT MemVT = InputNode->getMemoryVT();
9586 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9587 // memory VT is the same vector element VT type.
9588 // The loads feeding into the v8i16 and v16i8 types will be extending because
9589 // scalar i8/i16 are not legal types.
9590 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9591 (MemVT == Ty.getVectorElementType()))
9592 return true;
9593
9594 if (Ty == MVT::v2i64) {
9595 // Check the extend type, when the input type is i32, and the output vector
9596 // type is v2i64.
9597 if (MemVT == MVT::i32) {
9598 if (ISD::isZEXTLoad(InputNode))
9599 Opcode = PPCISD::ZEXT_LD_SPLAT;
9600 if (ISD::isSEXTLoad(InputNode))
9601 Opcode = PPCISD::SEXT_LD_SPLAT;
9602 }
9603 return true;
9604 }
9605 return false;
9606}
9607
9608// If this is a case we can't handle, return null and let the default
9609// expansion code take care of it. If we CAN select this case, and if it
9610// selects to a single instruction, return Op. Otherwise, if we can codegen
9611// this case more efficiently than a constant pool load, lower it to the
9612// sequence of ops that should be used.
9613SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9614 SelectionDAG &DAG) const {
9615 SDLoc dl(Op);
9616 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9617 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9618
9619 // Check if this is a splat of a constant value.
9620 APInt APSplatBits, APSplatUndef;
9621 unsigned SplatBitSize;
9622 bool HasAnyUndefs;
9623 bool BVNIsConstantSplat =
9624 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9625 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9626
9627 // If it is a splat of a double, check if we can shrink it to a 32 bit
9628 // non-denormal float which when converted back to double gives us the same
9629 // double. This is to exploit the XXSPLTIDP instruction.
9630 // If we lose precision, we use XXSPLTI32DX.
9631 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9632 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9633 // Check the type first to short-circuit so we don't modify APSplatBits if
9634 // this block isn't executed.
9635 if ((Op->getValueType(0) == MVT::v2f64) &&
9636 convertToNonDenormSingle(APSplatBits)) {
9637 SDValue SplatNode = DAG.getNode(
9638 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9639 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9640 return DAG.getBitcast(Op.getValueType(), SplatNode);
9641 } else {
9642 // We may lose precision, so we have to use XXSPLTI32DX.
9643
9644 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9645 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9646 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9647
9648 if (!Hi || !Lo)
9649 // If either load is 0, then we should generate XXLXOR to set to 0.
9650 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9651
9652 if (Hi)
9653 SplatNode = DAG.getNode(
9654 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9655 DAG.getTargetConstant(0, dl, MVT::i32),
9656 DAG.getTargetConstant(Hi, dl, MVT::i32));
9657
9658 if (Lo)
9659 SplatNode =
9660 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9661 DAG.getTargetConstant(1, dl, MVT::i32),
9662 DAG.getTargetConstant(Lo, dl, MVT::i32));
9663
9664 return DAG.getBitcast(Op.getValueType(), SplatNode);
9665 }
9666 }
9667
9668 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9669 unsigned NewOpcode = PPCISD::LD_SPLAT;
9670
9671 // Handle load-and-splat patterns as we have instructions that will do this
9672 // in one go.
9673 if (DAG.isSplatValue(Op, true) &&
9674 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9675 const SDValue *InputLoad = &Op.getOperand(0);
9676 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9677
9678 // If the input load is an extending load, it will be an i32 -> i64
9679 // extending load and isValidSplatLoad() will update NewOpcode.
9680 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9681 unsigned ElementSize =
9682 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9683
9684 assert(((ElementSize == 2 * MemorySize)
9685 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9686 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9687 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9688 "Unmatched element size and opcode!\n");
9689
9690 // Checking for a single use of this load, we have to check for vector
9691 // width (128 bits) / ElementSize uses (since each operand of the
9692 // BUILD_VECTOR is a separate use of the value.
9693 unsigned NumUsesOfInputLD = 128 / ElementSize;
9694 for (SDValue BVInOp : Op->ops())
9695 if (BVInOp.isUndef())
9696 NumUsesOfInputLD--;
9697
9698 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9699 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9700 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9701 // 15", but function IsValidSplatLoad() now will only return true when
9702 // the data at index 0 is not nullptr. So we will not get into trouble for
9703 // these cases.
9704 //
9705 // case 1 - lfiwzx/lfiwax
9706 // 1.1: load result is i32 and is sign/zero extend to i64;
9707 // 1.2: build a v2i64 vector type with above loaded value;
9708 // 1.3: the vector has only one value at index 0, others are all undef;
9709 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9710 if (NumUsesOfInputLD == 1 &&
9711 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9712 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9713 Subtarget.hasLFIWAX()))
9714 return SDValue();
9715
9716 // case 2 - lxvr[hb]x
9717 // 2.1: load result is at most i16;
9718 // 2.2: build a vector with above loaded value;
9719 // 2.3: the vector has only one value at index 0, others are all undef;
9720 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9721 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9722 Subtarget.isISA3_1() && ElementSize <= 16)
9723 return SDValue();
9724
9725 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9726 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9727 Subtarget.hasVSX()) {
9728 SDValue Ops[] = {
9729 LD->getChain(), // Chain
9730 LD->getBasePtr(), // Ptr
9731 DAG.getValueType(Op.getValueType()) // VT
9732 };
9733 SDValue LdSplt = DAG.getMemIntrinsicNode(
9734 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9735 LD->getMemoryVT(), LD->getMemOperand());
9736 // Replace all uses of the output chain of the original load with the
9737 // output chain of the new load.
9738 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9739 LdSplt.getValue(1));
9740 return LdSplt;
9741 }
9742 }
9743
9744 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9745 // 32-bits can be lowered to VSX instructions under certain conditions.
9746 // Without VSX, there is no pattern more efficient than expanding the node.
9747 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9748 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9749 Subtarget.hasP8Vector()))
9750 return Op;
9751 return SDValue();
9752 }
9753
9754 uint64_t SplatBits = APSplatBits.getZExtValue();
9755 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9756 unsigned SplatSize = SplatBitSize / 8;
9757
9758 // First, handle single instruction cases.
9759
9760 // All zeros?
9761 if (SplatBits == 0) {
9762 // Canonicalize all zero vectors to be v4i32.
9763 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9764 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9765 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9766 }
9767 return Op;
9768 }
9769
9770 // We have XXSPLTIW for constant splats four bytes wide.
9771 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9772 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9773 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9774 // turned into a 4-byte splat of 0xABABABAB.
9775 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9776 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9777 Op.getValueType(), DAG, dl);
9778
9779 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9780 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9781 dl);
9782
9783 // We have XXSPLTIB for constant splats one byte wide.
9784 if (Subtarget.hasP9Vector() && SplatSize == 1)
9785 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9786 dl);
9787
9788 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9789 int32_t SextVal = SignExtend32(SplatBits, SplatBitSize);
9790 if (SextVal >= -16 && SextVal <= 15)
9791 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9792 dl);
9793
9794 // Two instruction sequences.
9795
9796 // If this value is in the range [-32,30] and is even, use:
9797 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9798 // If this value is in the range [17,31] and is odd, use:
9799 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9800 // If this value is in the range [-31,-17] and is odd, use:
9801 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9802 // Note the last two are three-instruction sequences.
9803 if (SextVal >= -32 && SextVal <= 31) {
9804 // To avoid having these optimizations undone by constant folding,
9805 // we convert to a pseudo that will be expanded later into one of
9806 // the above forms.
9807 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9808 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9809 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9810 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9811 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9812 if (VT == Op.getValueType())
9813 return RetVal;
9814 else
9815 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9816 }
9817
9818 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9819 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9820 // for fneg/fabs.
9821 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9822 // Make -1 and vspltisw -1:
9823 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9824
9825 // Make the VSLW intrinsic, computing 0x8000_0000.
9826 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9827 OnesV, DAG, dl);
9828
9829 // xor by OnesV to invert it.
9830 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9831 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9832 }
9833
9834 // Check to see if this is a wide variety of vsplti*, binop self cases.
9835 static const signed char SplatCsts[] = {
9836 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9837 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9838 };
9839
9840 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9841 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9842 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9843 int i = SplatCsts[idx];
9844
9845 // Figure out what shift amount will be used by altivec if shifted by i in
9846 // this splat size.
9847 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9848
9849 // vsplti + shl self.
9850 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9851 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9852 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9853 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9854 Intrinsic::ppc_altivec_vslw
9855 };
9856 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9857 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9858 }
9859
9860 // vsplti + srl self.
9861 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9862 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9863 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9864 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9865 Intrinsic::ppc_altivec_vsrw
9866 };
9867 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9868 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9869 }
9870
9871 // vsplti + rol self.
9872 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9873 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9874 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9875 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9876 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9877 Intrinsic::ppc_altivec_vrlw
9878 };
9879 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9880 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9881 }
9882
9883 // t = vsplti c, result = vsldoi t, t, 1
9884 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9885 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9886 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9887 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9888 }
9889 // t = vsplti c, result = vsldoi t, t, 2
9890 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9891 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9892 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9893 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9894 }
9895 // t = vsplti c, result = vsldoi t, t, 3
9896 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9897 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9898 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9899 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9900 }
9901 }
9902
9903 return SDValue();
9904}
9905
9906/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9907/// the specified operations to build the shuffle.
9908static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9909 SDValue RHS, SelectionDAG &DAG,
9910 const SDLoc &dl) {
9911 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9912 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9913 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9914
9915 enum {
9916 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9917 OP_VMRGHW,
9918 OP_VMRGLW,
9919 OP_VSPLTISW0,
9920 OP_VSPLTISW1,
9921 OP_VSPLTISW2,
9922 OP_VSPLTISW3,
9923 OP_VSLDOI4,
9924 OP_VSLDOI8,
9925 OP_VSLDOI12
9926 };
9927
9928 if (OpNum == OP_COPY) {
9929 if (LHSID == (1*9+2)*9+3) return LHS;
9930 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9931 return RHS;
9932 }
9933
9934 SDValue OpLHS, OpRHS;
9935 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9936 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9937
9938 int ShufIdxs[16];
9939 switch (OpNum) {
9940 default: llvm_unreachable("Unknown i32 permute!");
9941 case OP_VMRGHW:
9942 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9943 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9944 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9945 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9946 break;
9947 case OP_VMRGLW:
9948 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9949 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9950 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9951 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9952 break;
9953 case OP_VSPLTISW0:
9954 for (unsigned i = 0; i != 16; ++i)
9955 ShufIdxs[i] = (i&3)+0;
9956 break;
9957 case OP_VSPLTISW1:
9958 for (unsigned i = 0; i != 16; ++i)
9959 ShufIdxs[i] = (i&3)+4;
9960 break;
9961 case OP_VSPLTISW2:
9962 for (unsigned i = 0; i != 16; ++i)
9963 ShufIdxs[i] = (i&3)+8;
9964 break;
9965 case OP_VSPLTISW3:
9966 for (unsigned i = 0; i != 16; ++i)
9967 ShufIdxs[i] = (i&3)+12;
9968 break;
9969 case OP_VSLDOI4:
9970 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9971 case OP_VSLDOI8:
9972 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9973 case OP_VSLDOI12:
9974 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9975 }
9976 EVT VT = OpLHS.getValueType();
9977 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9978 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9979 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9980 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9981}
9982
9983/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9984/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9985/// SDValue.
9986SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9987 SelectionDAG &DAG) const {
9988 const unsigned BytesInVector = 16;
9989 bool IsLE = Subtarget.isLittleEndian();
9990 SDLoc dl(N);
9991 SDValue V1 = N->getOperand(0);
9992 SDValue V2 = N->getOperand(1);
9993 unsigned ShiftElts = 0, InsertAtByte = 0;
9994 bool Swap = false;
9995
9996 // Shifts required to get the byte we want at element 7.
9997 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9998 0, 15, 14, 13, 12, 11, 10, 9};
9999 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10000 1, 2, 3, 4, 5, 6, 7, 8};
10001
10002 ArrayRef<int> Mask = N->getMask();
10003 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10004
10005 // For each mask element, find out if we're just inserting something
10006 // from V2 into V1 or vice versa.
10007 // Possible permutations inserting an element from V2 into V1:
10008 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10009 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10010 // ...
10011 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10012 // Inserting from V1 into V2 will be similar, except mask range will be
10013 // [16,31].
10014
10015 bool FoundCandidate = false;
10016 // If both vector operands for the shuffle are the same vector, the mask
10017 // will contain only elements from the first one and the second one will be
10018 // undef.
10019 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10020 // Go through the mask of half-words to find an element that's being moved
10021 // from one vector to the other.
10022 for (unsigned i = 0; i < BytesInVector; ++i) {
10023 unsigned CurrentElement = Mask[i];
10024 // If 2nd operand is undefined, we should only look for element 7 in the
10025 // Mask.
10026 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10027 continue;
10028
10029 bool OtherElementsInOrder = true;
10030 // Examine the other elements in the Mask to see if they're in original
10031 // order.
10032 for (unsigned j = 0; j < BytesInVector; ++j) {
10033 if (j == i)
10034 continue;
10035 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10036 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10037 // in which we always assume we're always picking from the 1st operand.
10038 int MaskOffset =
10039 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10040 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10041 OtherElementsInOrder = false;
10042 break;
10043 }
10044 }
10045 // If other elements are in original order, we record the number of shifts
10046 // we need to get the element we want into element 7. Also record which byte
10047 // in the vector we should insert into.
10048 if (OtherElementsInOrder) {
10049 // If 2nd operand is undefined, we assume no shifts and no swapping.
10050 if (V2.isUndef()) {
10051 ShiftElts = 0;
10052 Swap = false;
10053 } else {
10054 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10055 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10056 : BigEndianShifts[CurrentElement & 0xF];
10057 Swap = CurrentElement < BytesInVector;
10058 }
10059 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10060 FoundCandidate = true;
10061 break;
10062 }
10063 }
10064
10065 if (!FoundCandidate)
10066 return SDValue();
10067
10068 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10069 // optionally with VECSHL if shift is required.
10070 if (Swap)
10071 std::swap(V1, V2);
10072 if (V2.isUndef())
10073 V2 = V1;
10074 if (ShiftElts) {
10075 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10076 DAG.getConstant(ShiftElts, dl, MVT::i32));
10077 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10078 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10079 }
10080 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10081 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10082}
10083
10084/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10085/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10086/// SDValue.
10087SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10088 SelectionDAG &DAG) const {
10089 const unsigned NumHalfWords = 8;
10090 const unsigned BytesInVector = NumHalfWords * 2;
10091 // Check that the shuffle is on half-words.
10092 if (!isNByteElemShuffleMask(N, 2, 1))
10093 return SDValue();
10094
10095 bool IsLE = Subtarget.isLittleEndian();
10096 SDLoc dl(N);
10097 SDValue V1 = N->getOperand(0);
10098 SDValue V2 = N->getOperand(1);
10099 unsigned ShiftElts = 0, InsertAtByte = 0;
10100 bool Swap = false;
10101
10102 // Shifts required to get the half-word we want at element 3.
10103 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10104 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10105
10106 uint32_t Mask = 0;
10107 uint32_t OriginalOrderLow = 0x1234567;
10108 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10109 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10110 // 32-bit space, only need 4-bit nibbles per element.
10111 for (unsigned i = 0; i < NumHalfWords; ++i) {
10112 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10113 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10114 }
10115
10116 // For each mask element, find out if we're just inserting something
10117 // from V2 into V1 or vice versa. Possible permutations inserting an element
10118 // from V2 into V1:
10119 // X, 1, 2, 3, 4, 5, 6, 7
10120 // 0, X, 2, 3, 4, 5, 6, 7
10121 // 0, 1, X, 3, 4, 5, 6, 7
10122 // 0, 1, 2, X, 4, 5, 6, 7
10123 // 0, 1, 2, 3, X, 5, 6, 7
10124 // 0, 1, 2, 3, 4, X, 6, 7
10125 // 0, 1, 2, 3, 4, 5, X, 7
10126 // 0, 1, 2, 3, 4, 5, 6, X
10127 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10128
10129 bool FoundCandidate = false;
10130 // Go through the mask of half-words to find an element that's being moved
10131 // from one vector to the other.
10132 for (unsigned i = 0; i < NumHalfWords; ++i) {
10133 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10134 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10135 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10136 uint32_t TargetOrder = 0x0;
10137
10138 // If both vector operands for the shuffle are the same vector, the mask
10139 // will contain only elements from the first one and the second one will be
10140 // undef.
10141 if (V2.isUndef()) {
10142 ShiftElts = 0;
10143 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10144 TargetOrder = OriginalOrderLow;
10145 Swap = false;
10146 // Skip if not the correct element or mask of other elements don't equal
10147 // to our expected order.
10148 if (MaskOneElt == VINSERTHSrcElem &&
10149 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10150 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10151 FoundCandidate = true;
10152 break;
10153 }
10154 } else { // If both operands are defined.
10155 // Target order is [8,15] if the current mask is between [0,7].
10156 TargetOrder =
10157 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10158 // Skip if mask of other elements don't equal our expected order.
10159 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10160 // We only need the last 3 bits for the number of shifts.
10161 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10162 : BigEndianShifts[MaskOneElt & 0x7];
10163 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10164 Swap = MaskOneElt < NumHalfWords;
10165 FoundCandidate = true;
10166 break;
10167 }
10168 }
10169 }
10170
10171 if (!FoundCandidate)
10172 return SDValue();
10173
10174 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10175 // optionally with VECSHL if shift is required.
10176 if (Swap)
10177 std::swap(V1, V2);
10178 if (V2.isUndef())
10179 V2 = V1;
10180 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10181 if (ShiftElts) {
10182 // Double ShiftElts because we're left shifting on v16i8 type.
10183 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10184 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10185 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10186 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10187 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10188 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10189 }
10190 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10191 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10192 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10193 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10194}
10195
10196/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10197/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10198/// return the default SDValue.
10199SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10200 SelectionDAG &DAG) const {
10201 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10202 // to v16i8. Peek through the bitcasts to get the actual operands.
10205
10206 auto ShuffleMask = SVN->getMask();
10207 SDValue VecShuffle(SVN, 0);
10208 SDLoc DL(SVN);
10209
10210 // Check that we have a four byte shuffle.
10211 if (!isNByteElemShuffleMask(SVN, 4, 1))
10212 return SDValue();
10213
10214 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10215 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10216 std::swap(LHS, RHS);
10218 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10219 if (!CommutedSV)
10220 return SDValue();
10221 ShuffleMask = CommutedSV->getMask();
10222 }
10223
10224 // Ensure that the RHS is a vector of constants.
10225 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10226 if (!BVN)
10227 return SDValue();
10228
10229 // Check if RHS is a splat of 4-bytes (or smaller).
10230 APInt APSplatValue, APSplatUndef;
10231 unsigned SplatBitSize;
10232 bool HasAnyUndefs;
10233 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10234 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10235 SplatBitSize > 32)
10236 return SDValue();
10237
10238 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10239 // The instruction splats a constant C into two words of the source vector
10240 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10241 // Thus we check that the shuffle mask is the equivalent of
10242 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10243 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10244 // within each word are consecutive, so we only need to check the first byte.
10245 SDValue Index;
10246 bool IsLE = Subtarget.isLittleEndian();
10247 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10248 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10249 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10250 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10251 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10252 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10253 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10254 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10255 else
10256 return SDValue();
10257
10258 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10259 // for XXSPLTI32DX.
10260 unsigned SplatVal = APSplatValue.getZExtValue();
10261 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10262 SplatVal |= (SplatVal << SplatBitSize);
10263
10264 SDValue SplatNode = DAG.getNode(
10265 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10266 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10267 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10268}
10269
10270/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10271/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10272/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10273/// i.e (or (shl x, C1), (srl x, 128-C1)).
10274SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10275 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10276 assert(Op.getValueType() == MVT::v1i128 &&
10277 "Only set v1i128 as custom, other type shouldn't reach here!");
10278 SDLoc dl(Op);
10279 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10280 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10281 unsigned SHLAmt = N1.getConstantOperandVal(0);
10282 if (SHLAmt % 8 == 0) {
10283 std::array<int, 16> Mask;
10284 std::iota(Mask.begin(), Mask.end(), 0);
10285 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10286 if (SDValue Shuffle =
10287 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10288 DAG.getUNDEF(MVT::v16i8), Mask))
10289 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10290 }
10291 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10292 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10293 DAG.getConstant(SHLAmt, dl, MVT::i32));
10294 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10295 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10296 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10297 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10298}
10299
10300/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10301/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10302/// return the code it can be lowered into. Worst case, it can always be
10303/// lowered into a vperm.
10304SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10305 SelectionDAG &DAG) const {
10306 SDLoc dl(Op);
10307 SDValue V1 = Op.getOperand(0);
10308 SDValue V2 = Op.getOperand(1);
10309 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10310
10311 // Any nodes that were combined in the target-independent combiner prior
10312 // to vector legalization will not be sent to the target combine. Try to
10313 // combine it here.
10314 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10315 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10316 return NewShuffle;
10317 Op = NewShuffle;
10318 SVOp = cast<ShuffleVectorSDNode>(Op);
10319 V1 = Op.getOperand(0);
10320 V2 = Op.getOperand(1);
10321 }
10322 EVT VT = Op.getValueType();
10323 bool isLittleEndian = Subtarget.isLittleEndian();
10324
10325 unsigned ShiftElts, InsertAtByte;
10326 bool Swap = false;
10327
10328 // If this is a load-and-splat, we can do that with a single instruction
10329 // in some cases. However if the load has multiple uses, we don't want to
10330 // combine it because that will just produce multiple loads.
10331 bool IsPermutedLoad = false;
10332 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10333 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10334 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10335 InputLoad->hasOneUse()) {
10336 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10337 int SplatIdx =
10338 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10339
10340 // The splat index for permuted loads will be in the left half of the vector
10341 // which is strictly wider than the loaded value by 8 bytes. So we need to
10342 // adjust the splat index to point to the correct address in memory.
10343 if (IsPermutedLoad) {
10344 assert((isLittleEndian || IsFourByte) &&
10345 "Unexpected size for permuted load on big endian target");
10346 SplatIdx += IsFourByte ? 2 : 1;
10347 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10348 "Splat of a value outside of the loaded memory");
10349 }
10350
10351 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10352 // For 4-byte load-and-splat, we need Power9.
10353 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10354 uint64_t Offset = 0;
10355 if (IsFourByte)
10356 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10357 else
10358 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10359
10360 // If the width of the load is the same as the width of the splat,
10361 // loading with an offset would load the wrong memory.
10362 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10363 Offset = 0;
10364
10365 SDValue BasePtr = LD->getBasePtr();
10366 if (Offset != 0)
10368 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10369 SDValue Ops[] = {
10370 LD->getChain(), // Chain
10371 BasePtr, // BasePtr
10372 DAG.getValueType(Op.getValueType()) // VT
10373 };
10374 SDVTList VTL =
10375 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10376 SDValue LdSplt =
10378 Ops, LD->getMemoryVT(), LD->getMemOperand());
10379 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10380 if (LdSplt.getValueType() != SVOp->getValueType(0))
10381 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10382 return LdSplt;
10383 }
10384 }
10385
10386 // All v2i64 and v2f64 shuffles are legal
10387 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10388 return Op;
10389
10390 if (Subtarget.hasP9Vector() &&
10391 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10392 isLittleEndian)) {
10393 if (V2.isUndef())
10394 V2 = V1;
10395 else if (Swap)
10396 std::swap(V1, V2);
10397 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10398 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10399 if (ShiftElts) {
10400 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10401 DAG.getConstant(ShiftElts, dl, MVT::i32));
10402 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10403 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10404 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10405 }
10406 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10407 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10408 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10409 }
10410
10411 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10412 SDValue SplatInsertNode;
10413 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10414 return SplatInsertNode;
10415 }
10416
10417 if (Subtarget.hasP9Altivec()) {
10418 SDValue NewISDNode;
10419 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10420 return NewISDNode;
10421
10422 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10423 return NewISDNode;
10424 }
10425
10426 if (Subtarget.hasVSX() &&
10427 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10428 if (Swap)
10429 std::swap(V1, V2);
10430 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10431 SDValue Conv2 =
10432 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10433
10434 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10435 DAG.getConstant(ShiftElts, dl, MVT::i32));
10436 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10437 }
10438
10439 if (Subtarget.hasVSX() &&
10440 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10441 if (Swap)
10442 std::swap(V1, V2);
10443 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10444 SDValue Conv2 =
10445 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10446
10447 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10448 DAG.getConstant(ShiftElts, dl, MVT::i32));
10449 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10450 }
10451
10452 if (Subtarget.hasP9Vector()) {
10453 if (PPC::isXXBRHShuffleMask(SVOp)) {
10454 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10455 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10456 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10457 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10458 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10459 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10460 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10461 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10462 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10463 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10464 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10465 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10466 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10467 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10468 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10469 }
10470 }
10471
10472 if (Subtarget.hasVSX()) {
10473 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10474 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10475
10476 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10477 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10478 DAG.getConstant(SplatIdx, dl, MVT::i32));
10479 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10480 }
10481
10482 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10483 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10484 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10485 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10486 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10487 }
10488 }
10489
10490 // Cases that are handled by instructions that take permute immediates
10491 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10492 // selected by the instruction selector.
10493 if (V2.isUndef()) {
10494 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10495 PPC::isSplatShuffleMask(SVOp, 2) ||
10496 PPC::isSplatShuffleMask(SVOp, 4) ||
10497 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10498 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10499 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10500 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10501 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10502 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10503 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10504 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10505 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10506 (Subtarget.hasP8Altivec() && (
10507 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10508 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10509 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10510 return Op;
10511 }
10512 }
10513
10514 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10515 // and produce a fixed permutation. If any of these match, do not lower to
10516 // VPERM.
10517 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10518 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10519 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10520 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10521 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10522 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10523 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10524 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10525 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10526 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10527 (Subtarget.hasP8Altivec() && (
10528 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10529 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10530 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10531 return Op;
10532
10533 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10534 // perfect shuffle table to emit an optimal matching sequence.
10535 ArrayRef<int> PermMask = SVOp->getMask();
10536
10537 if (!DisablePerfectShuffle && !isLittleEndian) {
10538 unsigned PFIndexes[4];
10539 bool isFourElementShuffle = true;
10540 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10541 ++i) { // Element number
10542 unsigned EltNo = 8; // Start out undef.
10543 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10544 if (PermMask[i * 4 + j] < 0)
10545 continue; // Undef, ignore it.
10546
10547 unsigned ByteSource = PermMask[i * 4 + j];
10548 if ((ByteSource & 3) != j) {
10549 isFourElementShuffle = false;
10550 break;
10551 }
10552
10553 if (EltNo == 8) {
10554 EltNo = ByteSource / 4;
10555 } else if (EltNo != ByteSource / 4) {
10556 isFourElementShuffle = false;
10557 break;
10558 }
10559 }
10560 PFIndexes[i] = EltNo;
10561 }
10562
10563 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10564 // perfect shuffle vector to determine if it is cost effective to do this as
10565 // discrete instructions, or whether we should use a vperm.
10566 // For now, we skip this for little endian until such time as we have a
10567 // little-endian perfect shuffle table.
10568 if (isFourElementShuffle) {
10569 // Compute the index in the perfect shuffle table.
10570 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10571 PFIndexes[2] * 9 + PFIndexes[3];
10572
10573 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10574 unsigned Cost = (PFEntry >> 30);
10575
10576 // Determining when to avoid vperm is tricky. Many things affect the cost
10577 // of vperm, particularly how many times the perm mask needs to be
10578 // computed. For example, if the perm mask can be hoisted out of a loop or
10579 // is already used (perhaps because there are multiple permutes with the
10580 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10581 // permute mask out of the loop requires an extra register.
10582 //
10583 // As a compromise, we only emit discrete instructions if the shuffle can
10584 // be generated in 3 or fewer operations. When we have loop information
10585 // available, if this block is within a loop, we should avoid using vperm
10586 // for 3-operation perms and use a constant pool load instead.
10587 if (Cost < 3)
10588 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10589 }
10590 }
10591
10592 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10593 // vector that will get spilled to the constant pool.
10594 if (V2.isUndef()) V2 = V1;
10595
10596 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10597}
10598
10599SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10600 ArrayRef<int> PermMask, EVT VT,
10601 SDValue V1, SDValue V2) const {
10602 unsigned Opcode = PPCISD::VPERM;
10603 EVT ValType = V1.getValueType();
10604 SDLoc dl(Op);
10605 bool NeedSwap = false;
10606 bool isLittleEndian = Subtarget.isLittleEndian();
10607 bool isPPC64 = Subtarget.isPPC64();
10608
10609 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10610 (V1->hasOneUse() || V2->hasOneUse())) {
10611 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10612 "XXPERM instead\n");
10613 Opcode = PPCISD::XXPERM;
10614
10615 // The second input to XXPERM is also an output so if the second input has
10616 // multiple uses then copying is necessary, as a result we want the
10617 // single-use operand to be used as the second input to prevent copying.
10618 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10619 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10620 std::swap(V1, V2);
10621 NeedSwap = !NeedSwap;
10622 }
10623 }
10624
10625 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10626 // that it is in input element units, not in bytes. Convert now.
10627
10628 // For little endian, the order of the input vectors is reversed, and
10629 // the permutation mask is complemented with respect to 31. This is
10630 // necessary to produce proper semantics with the big-endian-based vperm
10631 // instruction.
10632 EVT EltVT = V1.getValueType().getVectorElementType();
10633 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10634
10635 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10636 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10637
10638 /*
10639 Vectors will be appended like so: [ V1 | v2 ]
10640 XXSWAPD on V1:
10641 [ A | B | C | D ] -> [ C | D | A | B ]
10642 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10643 i.e. index of A, B += 8, and index of C, D -= 8.
10644 XXSWAPD on V2:
10645 [ E | F | G | H ] -> [ G | H | E | F ]
10646 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10647 i.e. index of E, F += 8, index of G, H -= 8
10648 Swap V1 and V2:
10649 [ V1 | V2 ] -> [ V2 | V1 ]
10650 0-15 16-31 0-15 16-31
10651 i.e. index of V1 += 16, index of V2 -= 16
10652 */
10653
10654 SmallVector<SDValue, 16> ResultMask;
10655 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10656 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10657
10658 if (V1HasXXSWAPD) {
10659 if (SrcElt < 8)
10660 SrcElt += 8;
10661 else if (SrcElt < 16)
10662 SrcElt -= 8;
10663 }
10664 if (V2HasXXSWAPD) {
10665 if (SrcElt > 23)
10666 SrcElt -= 8;
10667 else if (SrcElt > 15)
10668 SrcElt += 8;
10669 }
10670 if (NeedSwap) {
10671 if (SrcElt < 16)
10672 SrcElt += 16;
10673 else
10674 SrcElt -= 16;
10675 }
10676 for (unsigned j = 0; j != BytesPerElement; ++j)
10677 if (isLittleEndian)
10678 ResultMask.push_back(
10679 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10680 else
10681 ResultMask.push_back(
10682 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10683 }
10684
10685 if (V1HasXXSWAPD) {
10686 dl = SDLoc(V1->getOperand(0));
10687 V1 = V1->getOperand(0)->getOperand(1);
10688 }
10689 if (V2HasXXSWAPD) {
10690 dl = SDLoc(V2->getOperand(0));
10691 V2 = V2->getOperand(0)->getOperand(1);
10692 }
10693
10694 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10695 if (ValType != MVT::v2f64)
10696 V1 = DAG.getBitcast(MVT::v2f64, V1);
10697 if (V2.getValueType() != MVT::v2f64)
10698 V2 = DAG.getBitcast(MVT::v2f64, V2);
10699 }
10700
10701 ShufflesHandledWithVPERM++;
10702 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10703 LLVM_DEBUG({
10704 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10705 if (Opcode == PPCISD::XXPERM) {
10706 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10707 } else {
10708 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10709 }
10710 SVOp->dump();
10711 dbgs() << "With the following permute control vector:\n";
10712 VPermMask.dump();
10713 });
10714
10715 if (Opcode == PPCISD::XXPERM)
10716 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10717
10718 // Only need to place items backwards in LE,
10719 // the mask was properly calculated.
10720 if (isLittleEndian)
10721 std::swap(V1, V2);
10722
10723 SDValue VPERMNode =
10724 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10725
10726 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10727 return VPERMNode;
10728}
10729
10730/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10731/// vector comparison. If it is, return true and fill in Opc/isDot with
10732/// information about the intrinsic.
10733static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10734 bool &isDot, const PPCSubtarget &Subtarget) {
10735 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10736 CompareOpc = -1;
10737 isDot = false;
10738 switch (IntrinsicID) {
10739 default:
10740 return false;
10741 // Comparison predicates.
10742 case Intrinsic::ppc_altivec_vcmpbfp_p:
10743 CompareOpc = 966;
10744 isDot = true;
10745 break;
10746 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10747 CompareOpc = 198;
10748 isDot = true;
10749 break;
10750 case Intrinsic::ppc_altivec_vcmpequb_p:
10751 CompareOpc = 6;
10752 isDot = true;
10753 break;
10754 case Intrinsic::ppc_altivec_vcmpequh_p:
10755 CompareOpc = 70;
10756 isDot = true;
10757 break;
10758 case Intrinsic::ppc_altivec_vcmpequw_p:
10759 CompareOpc = 134;
10760 isDot = true;
10761 break;
10762 case Intrinsic::ppc_altivec_vcmpequd_p:
10763 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10764 CompareOpc = 199;
10765 isDot = true;
10766 } else
10767 return false;
10768 break;
10769 case Intrinsic::ppc_altivec_vcmpneb_p:
10770 case Intrinsic::ppc_altivec_vcmpneh_p:
10771 case Intrinsic::ppc_altivec_vcmpnew_p:
10772 case Intrinsic::ppc_altivec_vcmpnezb_p:
10773 case Intrinsic::ppc_altivec_vcmpnezh_p:
10774 case Intrinsic::ppc_altivec_vcmpnezw_p:
10775 if (Subtarget.hasP9Altivec()) {
10776 switch (IntrinsicID) {
10777 default:
10778 llvm_unreachable("Unknown comparison intrinsic.");
10779 case Intrinsic::ppc_altivec_vcmpneb_p:
10780 CompareOpc = 7;
10781 break;
10782 case Intrinsic::ppc_altivec_vcmpneh_p:
10783 CompareOpc = 71;
10784 break;
10785 case Intrinsic::ppc_altivec_vcmpnew_p:
10786 CompareOpc = 135;
10787 break;
10788 case Intrinsic::ppc_altivec_vcmpnezb_p:
10789 CompareOpc = 263;
10790 break;
10791 case Intrinsic::ppc_altivec_vcmpnezh_p:
10792 CompareOpc = 327;
10793 break;
10794 case Intrinsic::ppc_altivec_vcmpnezw_p:
10795 CompareOpc = 391;
10796 break;
10797 }
10798 isDot = true;
10799 } else
10800 return false;
10801 break;
10802 case Intrinsic::ppc_altivec_vcmpgefp_p:
10803 CompareOpc = 454;
10804 isDot = true;
10805 break;
10806 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10807 CompareOpc = 710;
10808 isDot = true;
10809 break;
10810 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10811 CompareOpc = 774;
10812 isDot = true;
10813 break;
10814 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10815 CompareOpc = 838;
10816 isDot = true;
10817 break;
10818 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10819 CompareOpc = 902;
10820 isDot = true;
10821 break;
10822 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10823 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10824 CompareOpc = 967;
10825 isDot = true;
10826 } else
10827 return false;
10828 break;
10829 case Intrinsic::ppc_altivec_vcmpgtub_p:
10830 CompareOpc = 518;
10831 isDot = true;
10832 break;
10833 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10834 CompareOpc = 582;
10835 isDot = true;
10836 break;
10837 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10838 CompareOpc = 646;
10839 isDot = true;
10840 break;
10841 case Intrinsic::ppc_altivec_vcmpgtud_p:
10842 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10843 CompareOpc = 711;
10844 isDot = true;
10845 } else
10846 return false;
10847 break;
10848
10849 case Intrinsic::ppc_altivec_vcmpequq:
10850 case Intrinsic::ppc_altivec_vcmpgtsq:
10851 case Intrinsic::ppc_altivec_vcmpgtuq:
10852 if (!Subtarget.isISA3_1())
10853 return false;
10854 switch (IntrinsicID) {
10855 default:
10856 llvm_unreachable("Unknown comparison intrinsic.");
10857 case Intrinsic::ppc_altivec_vcmpequq:
10858 CompareOpc = 455;
10859 break;
10860 case Intrinsic::ppc_altivec_vcmpgtsq:
10861 CompareOpc = 903;
10862 break;
10863 case Intrinsic::ppc_altivec_vcmpgtuq:
10864 CompareOpc = 647;
10865 break;
10866 }
10867 break;
10868
10869 // VSX predicate comparisons use the same infrastructure
10870 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10871 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10872 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10873 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10874 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10875 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10876 if (Subtarget.hasVSX()) {
10877 switch (IntrinsicID) {
10878 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10879 CompareOpc = 99;
10880 break;
10881 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10882 CompareOpc = 115;
10883 break;
10884 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10885 CompareOpc = 107;
10886 break;
10887 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10888 CompareOpc = 67;
10889 break;
10890 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10891 CompareOpc = 83;
10892 break;
10893 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10894 CompareOpc = 75;
10895 break;
10896 }
10897 isDot = true;
10898 } else
10899 return false;
10900 break;
10901
10902 // Normal Comparisons.
10903 case Intrinsic::ppc_altivec_vcmpbfp:
10904 CompareOpc = 966;
10905 break;
10906 case Intrinsic::ppc_altivec_vcmpeqfp:
10907 CompareOpc = 198;
10908 break;
10909 case Intrinsic::ppc_altivec_vcmpequb:
10910 CompareOpc = 6;
10911 break;
10912 case Intrinsic::ppc_altivec_vcmpequh:
10913 CompareOpc = 70;
10914 break;
10915 case Intrinsic::ppc_altivec_vcmpequw:
10916 CompareOpc = 134;
10917 break;
10918 case Intrinsic::ppc_altivec_vcmpequd:
10919 if (Subtarget.hasP8Altivec())
10920 CompareOpc = 199;
10921 else
10922 return false;
10923 break;
10924 case Intrinsic::ppc_altivec_vcmpneb:
10925 case Intrinsic::ppc_altivec_vcmpneh:
10926 case Intrinsic::ppc_altivec_vcmpnew:
10927 case Intrinsic::ppc_altivec_vcmpnezb:
10928 case Intrinsic::ppc_altivec_vcmpnezh:
10929 case Intrinsic::ppc_altivec_vcmpnezw:
10930 if (Subtarget.hasP9Altivec())
10931 switch (IntrinsicID) {
10932 default:
10933 llvm_unreachable("Unknown comparison intrinsic.");
10934 case Intrinsic::ppc_altivec_vcmpneb:
10935 CompareOpc = 7;
10936 break;
10937 case Intrinsic::ppc_altivec_vcmpneh:
10938 CompareOpc = 71;
10939 break;
10940 case Intrinsic::ppc_altivec_vcmpnew:
10941 CompareOpc = 135;
10942 break;
10943 case Intrinsic::ppc_altivec_vcmpnezb:
10944 CompareOpc = 263;
10945 break;
10946 case Intrinsic::ppc_altivec_vcmpnezh:
10947 CompareOpc = 327;
10948 break;
10949 case Intrinsic::ppc_altivec_vcmpnezw:
10950 CompareOpc = 391;
10951 break;
10952 }
10953 else
10954 return false;
10955 break;
10956 case Intrinsic::ppc_altivec_vcmpgefp:
10957 CompareOpc = 454;
10958 break;
10959 case Intrinsic::ppc_altivec_vcmpgtfp:
10960 CompareOpc = 710;
10961 break;
10962 case Intrinsic::ppc_altivec_vcmpgtsb:
10963 CompareOpc = 774;
10964 break;
10965 case Intrinsic::ppc_altivec_vcmpgtsh:
10966 CompareOpc = 838;
10967 break;
10968 case Intrinsic::ppc_altivec_vcmpgtsw:
10969 CompareOpc = 902;
10970 break;
10971 case Intrinsic::ppc_altivec_vcmpgtsd:
10972 if (Subtarget.hasP8Altivec())
10973 CompareOpc = 967;
10974 else
10975 return false;
10976 break;
10977 case Intrinsic::ppc_altivec_vcmpgtub:
10978 CompareOpc = 518;
10979 break;
10980 case Intrinsic::ppc_altivec_vcmpgtuh:
10981 CompareOpc = 582;
10982 break;
10983 case Intrinsic::ppc_altivec_vcmpgtuw:
10984 CompareOpc = 646;
10985 break;
10986 case Intrinsic::ppc_altivec_vcmpgtud:
10987 if (Subtarget.hasP8Altivec())
10988 CompareOpc = 711;
10989 else
10990 return false;
10991 break;
10992 case Intrinsic::ppc_altivec_vcmpequq_p:
10993 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10994 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10995 if (!Subtarget.isISA3_1())
10996 return false;
10997 switch (IntrinsicID) {
10998 default:
10999 llvm_unreachable("Unknown comparison intrinsic.");
11000 case Intrinsic::ppc_altivec_vcmpequq_p:
11001 CompareOpc = 455;
11002 break;
11003 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11004 CompareOpc = 903;
11005 break;
11006 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11007 CompareOpc = 647;
11008 break;
11009 }
11010 isDot = true;
11011 break;
11012 }
11013 return true;
11014}
11015
11016/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11017/// lower, do it, otherwise return null.
11018SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11019 SelectionDAG &DAG) const {
11020 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11021
11022 SDLoc dl(Op);
11023
11024 switch (IntrinsicID) {
11025 case Intrinsic::thread_pointer:
11026 // Reads the thread pointer register, used for __builtin_thread_pointer.
11027 if (Subtarget.isPPC64())
11028 return DAG.getRegister(PPC::X13, MVT::i64);
11029 return DAG.getRegister(PPC::R2, MVT::i32);
11030
11031 case Intrinsic::ppc_rldimi: {
11032 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11033 SDValue Src = Op.getOperand(1);
11034 APInt Mask = Op.getConstantOperandAPInt(4);
11035 if (Mask.isZero())
11036 return Op.getOperand(2);
11037 if (Mask.isAllOnes())
11038 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11039 uint64_t SH = Op.getConstantOperandVal(3);
11040 unsigned MB = 0, ME = 0;
11041 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11042 report_fatal_error("invalid rldimi mask!");
11043 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11044 if (ME < 63 - SH) {
11045 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11046 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11047 } else if (ME > 63 - SH) {
11048 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11049 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11050 }
11051 return SDValue(
11052 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11053 {Op.getOperand(2), Src,
11054 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11055 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11056 0);
11057 }
11058
11059 case Intrinsic::ppc_rlwimi: {
11060 APInt Mask = Op.getConstantOperandAPInt(4);
11061 if (Mask.isZero())
11062 return Op.getOperand(2);
11063 if (Mask.isAllOnes())
11064 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11065 Op.getOperand(3));
11066 unsigned MB = 0, ME = 0;
11067 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11068 report_fatal_error("invalid rlwimi mask!");
11069 return SDValue(DAG.getMachineNode(
11070 PPC::RLWIMI, dl, MVT::i32,
11071 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11072 DAG.getTargetConstant(MB, dl, MVT::i32),
11073 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11074 0);
11075 }
11076
11077 case Intrinsic::ppc_rlwnm: {
11078 if (Op.getConstantOperandVal(3) == 0)
11079 return DAG.getConstant(0, dl, MVT::i32);
11080 unsigned MB = 0, ME = 0;
11081 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11082 report_fatal_error("invalid rlwnm mask!");
11083 return SDValue(
11084 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11085 {Op.getOperand(1), Op.getOperand(2),
11086 DAG.getTargetConstant(MB, dl, MVT::i32),
11087 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11088 0);
11089 }
11090
11091 case Intrinsic::ppc_mma_disassemble_acc: {
11092 if (Subtarget.isISAFuture()) {
11093 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11094 SDValue WideVec =
11095 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11096 Op.getOperand(1)),
11097 0);
11099 SDValue Value = SDValue(WideVec.getNode(), 0);
11100 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11101
11102 SDValue Extract;
11103 Extract = DAG.getNode(
11104 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11105 Subtarget.isLittleEndian() ? Value2 : Value,
11106 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11107 dl, getPointerTy(DAG.getDataLayout())));
11108 RetOps.push_back(Extract);
11109 Extract = DAG.getNode(
11110 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11111 Subtarget.isLittleEndian() ? Value2 : Value,
11112 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11113 dl, getPointerTy(DAG.getDataLayout())));
11114 RetOps.push_back(Extract);
11115 Extract = DAG.getNode(
11116 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11117 Subtarget.isLittleEndian() ? Value : Value2,
11118 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11119 dl, getPointerTy(DAG.getDataLayout())));
11120 RetOps.push_back(Extract);
11121 Extract = DAG.getNode(
11122 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11123 Subtarget.isLittleEndian() ? Value : Value2,
11124 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11125 dl, getPointerTy(DAG.getDataLayout())));
11126 RetOps.push_back(Extract);
11127 return DAG.getMergeValues(RetOps, dl);
11128 }
11129 [[fallthrough]];
11130 }
11131 case Intrinsic::ppc_vsx_disassemble_pair: {
11132 int NumVecs = 2;
11133 SDValue WideVec = Op.getOperand(1);
11134 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11135 NumVecs = 4;
11136 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11137 }
11139 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11140 SDValue Extract = DAG.getNode(
11141 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11142 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11143 : VecNo,
11144 dl, getPointerTy(DAG.getDataLayout())));
11145 RetOps.push_back(Extract);
11146 }
11147 return DAG.getMergeValues(RetOps, dl);
11148 }
11149
11150 case Intrinsic::ppc_mma_xxmfacc:
11151 case Intrinsic::ppc_mma_xxmtacc: {
11152 // Allow pre-isa-future subtargets to lower as normal.
11153 if (!Subtarget.isISAFuture())
11154 return SDValue();
11155 // The intrinsics for xxmtacc and xxmfacc take one argument of
11156 // type v512i1, for future cpu the corresponding wacc instruction
11157 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11158 // the need to produce the xxm[t|f]acc.
11159 SDValue WideVec = Op.getOperand(1);
11160 DAG.ReplaceAllUsesWith(Op, WideVec);
11161 return SDValue();
11162 }
11163
11164 case Intrinsic::ppc_unpack_longdouble: {
11165 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11166 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11167 "Argument of long double unpack must be 0 or 1!");
11168 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11169 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11170 Idx->getValueType(0)));
11171 }
11172
11173 case Intrinsic::ppc_compare_exp_lt:
11174 case Intrinsic::ppc_compare_exp_gt:
11175 case Intrinsic::ppc_compare_exp_eq:
11176 case Intrinsic::ppc_compare_exp_uo: {
11177 unsigned Pred;
11178 switch (IntrinsicID) {
11179 case Intrinsic::ppc_compare_exp_lt:
11180 Pred = PPC::PRED_LT;
11181 break;
11182 case Intrinsic::ppc_compare_exp_gt:
11183 Pred = PPC::PRED_GT;
11184 break;
11185 case Intrinsic::ppc_compare_exp_eq:
11186 Pred = PPC::PRED_EQ;
11187 break;
11188 case Intrinsic::ppc_compare_exp_uo:
11189 Pred = PPC::PRED_UN;
11190 break;
11191 }
11192 return SDValue(
11193 DAG.getMachineNode(
11194 PPC::SELECT_CC_I4, dl, MVT::i32,
11195 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11196 Op.getOperand(1), Op.getOperand(2)),
11197 0),
11198 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11199 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11200 0);
11201 }
11202 case Intrinsic::ppc_test_data_class: {
11203 EVT OpVT = Op.getOperand(1).getValueType();
11204 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11205 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11206 : PPC::XSTSTDCSP);
11207 return SDValue(
11208 DAG.getMachineNode(
11209 PPC::SELECT_CC_I4, dl, MVT::i32,
11210 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11211 Op.getOperand(1)),
11212 0),
11213 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11214 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11215 0);
11216 }
11217 case Intrinsic::ppc_fnmsub: {
11218 EVT VT = Op.getOperand(1).getValueType();
11219 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11220 return DAG.getNode(
11221 ISD::FNEG, dl, VT,
11222 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11223 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11224 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11225 Op.getOperand(2), Op.getOperand(3));
11226 }
11227 case Intrinsic::ppc_convert_f128_to_ppcf128:
11228 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11229 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11230 ? RTLIB::CONVERT_PPCF128_F128
11231 : RTLIB::CONVERT_F128_PPCF128;
11232 MakeLibCallOptions CallOptions;
11233 std::pair<SDValue, SDValue> Result =
11234 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11235 dl, SDValue());
11236 return Result.first;
11237 }
11238 case Intrinsic::ppc_maxfe:
11239 case Intrinsic::ppc_maxfl:
11240 case Intrinsic::ppc_maxfs:
11241 case Intrinsic::ppc_minfe:
11242 case Intrinsic::ppc_minfl:
11243 case Intrinsic::ppc_minfs: {
11244 EVT VT = Op.getValueType();
11245 assert(
11246 all_of(Op->ops().drop_front(4),
11247 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11248 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11249 (void)VT;
11251 if (IntrinsicID == Intrinsic::ppc_minfe ||
11252 IntrinsicID == Intrinsic::ppc_minfl ||
11253 IntrinsicID == Intrinsic::ppc_minfs)
11254 CC = ISD::SETLT;
11255 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11256 SDValue Res = Op.getOperand(I);
11257 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11258 Res =
11259 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11260 }
11261 return Res;
11262 }
11263 }
11264
11265 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11266 // opcode number of the comparison.
11267 int CompareOpc;
11268 bool isDot;
11269 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11270 return SDValue(); // Don't custom lower most intrinsics.
11271
11272 // If this is a non-dot comparison, make the VCMP node and we are done.
11273 if (!isDot) {
11274 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11275 Op.getOperand(1), Op.getOperand(2),
11276 DAG.getConstant(CompareOpc, dl, MVT::i32));
11277 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11278 }
11279
11280 // Create the PPCISD altivec 'dot' comparison node.
11281 SDValue Ops[] = {
11282 Op.getOperand(2), // LHS
11283 Op.getOperand(3), // RHS
11284 DAG.getConstant(CompareOpc, dl, MVT::i32)
11285 };
11286 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11287 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11288
11289 // Unpack the result based on how the target uses it.
11290 unsigned BitNo; // Bit # of CR6.
11291 bool InvertBit; // Invert result?
11292 unsigned Bitx;
11293 unsigned SetOp;
11294 switch (Op.getConstantOperandVal(1)) {
11295 default: // Can't happen, don't crash on invalid number though.
11296 case 0: // Return the value of the EQ bit of CR6.
11297 BitNo = 0;
11298 InvertBit = false;
11299 Bitx = PPC::sub_eq;
11300 SetOp = PPCISD::SETBC;
11301 break;
11302 case 1: // Return the inverted value of the EQ bit of CR6.
11303 BitNo = 0;
11304 InvertBit = true;
11305 Bitx = PPC::sub_eq;
11306 SetOp = PPCISD::SETBCR;
11307 break;
11308 case 2: // Return the value of the LT bit of CR6.
11309 BitNo = 2;
11310 InvertBit = false;
11311 Bitx = PPC::sub_lt;
11312 SetOp = PPCISD::SETBC;
11313 break;
11314 case 3: // Return the inverted value of the LT bit of CR6.
11315 BitNo = 2;
11316 InvertBit = true;
11317 Bitx = PPC::sub_lt;
11318 SetOp = PPCISD::SETBCR;
11319 break;
11320 }
11321
11322 SDValue GlueOp = CompNode.getValue(1);
11323 if (Subtarget.isISA3_1()) {
11324 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11325 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11326 SDValue CRBit =
11327 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11328 CR6Reg, SubRegIdx, GlueOp),
11329 0);
11330 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11331 }
11332
11333 // Now that we have the comparison, emit a copy from the CR to a GPR.
11334 // This is flagged to the above dot comparison.
11335 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11336 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11337
11338 // Shift the bit into the low position.
11339 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11340 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11341 // Isolate the bit.
11342 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11343 DAG.getConstant(1, dl, MVT::i32));
11344
11345 // If we are supposed to, toggle the bit.
11346 if (InvertBit)
11347 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11348 DAG.getConstant(1, dl, MVT::i32));
11349 return Flags;
11350}
11351
11352SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11353 SelectionDAG &DAG) const {
11354 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11355 // the beginning of the argument list.
11356 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11357 SDLoc DL(Op);
11358 switch (Op.getConstantOperandVal(ArgStart)) {
11359 case Intrinsic::ppc_cfence: {
11360 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11361 SDValue Val = Op.getOperand(ArgStart + 1);
11362 EVT Ty = Val.getValueType();
11363 if (Ty == MVT::i128) {
11364 // FIXME: Testing one of two paired registers is sufficient to guarantee
11365 // ordering?
11366 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11367 }
11368 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11369 return SDValue(
11370 DAG.getMachineNode(
11371 Opcode, DL, MVT::Other,
11372 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11373 Op.getOperand(0)),
11374 0);
11375 }
11376 default:
11377 break;
11378 }
11379 return SDValue();
11380}
11381
11382// Lower scalar BSWAP64 to xxbrd.
11383SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11384 SDLoc dl(Op);
11385 if (!Subtarget.isPPC64())
11386 return Op;
11387 // MTVSRDD
11388 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11389 Op.getOperand(0));
11390 // XXBRD
11391 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11392 // MFVSRD
11393 int VectorIndex = 0;
11394 if (Subtarget.isLittleEndian())
11395 VectorIndex = 1;
11396 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11397 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11398 return Op;
11399}
11400
11401// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11402// compared to a value that is atomically loaded (atomic loads zero-extend).
11403SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11404 SelectionDAG &DAG) const {
11405 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11406 "Expecting an atomic compare-and-swap here.");
11407 SDLoc dl(Op);
11408 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11409 EVT MemVT = AtomicNode->getMemoryVT();
11410 if (MemVT.getSizeInBits() >= 32)
11411 return Op;
11412
11413 SDValue CmpOp = Op.getOperand(2);
11414 // If this is already correctly zero-extended, leave it alone.
11415 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11416 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11417 return Op;
11418
11419 // Clear the high bits of the compare operand.
11420 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11421 SDValue NewCmpOp =
11422 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11423 DAG.getConstant(MaskVal, dl, MVT::i32));
11424
11425 // Replace the existing compare operand with the properly zero-extended one.
11427 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11428 Ops.push_back(AtomicNode->getOperand(i));
11429 Ops[2] = NewCmpOp;
11430 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11431 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11432 auto NodeTy =
11434 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11435}
11436
11437SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11438 SelectionDAG &DAG) const {
11439 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11440 EVT MemVT = N->getMemoryVT();
11441 assert(MemVT.getSimpleVT() == MVT::i128 &&
11442 "Expect quadword atomic operations");
11443 SDLoc dl(N);
11444 unsigned Opc = N->getOpcode();
11445 switch (Opc) {
11446 case ISD::ATOMIC_LOAD: {
11447 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11448 // lowered to ppc instructions by pattern matching instruction selector.
11449 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11451 N->getOperand(0),
11452 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11453 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11454 Ops.push_back(N->getOperand(I));
11455 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11456 Ops, MemVT, N->getMemOperand());
11457 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11458 SDValue ValHi =
11459 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11460 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11461 DAG.getConstant(64, dl, MVT::i32));
11462 SDValue Val =
11463 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11464 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11465 {Val, LoadedVal.getValue(2)});
11466 }
11467 case ISD::ATOMIC_STORE: {
11468 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11469 // lowered to ppc instructions by pattern matching instruction selector.
11470 SDVTList Tys = DAG.getVTList(MVT::Other);
11472 N->getOperand(0),
11473 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11474 SDValue Val = N->getOperand(1);
11475 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11476 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11477 DAG.getConstant(64, dl, MVT::i32));
11478 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11479 Ops.push_back(ValLo);
11480 Ops.push_back(ValHi);
11481 Ops.push_back(N->getOperand(2));
11482 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11483 N->getMemOperand());
11484 }
11485 default:
11486 llvm_unreachable("Unexpected atomic opcode");
11487 }
11488}
11489
11491 SelectionDAG &DAG,
11492 const PPCSubtarget &Subtarget) {
11493 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11494
11495 enum DataClassMask {
11496 DC_NAN = 1 << 6,
11497 DC_NEG_INF = 1 << 4,
11498 DC_POS_INF = 1 << 5,
11499 DC_NEG_ZERO = 1 << 2,
11500 DC_POS_ZERO = 1 << 3,
11501 DC_NEG_SUBNORM = 1,
11502 DC_POS_SUBNORM = 1 << 1,
11503 };
11504
11505 EVT VT = Op.getValueType();
11506
11507 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11508 : VT == MVT::f64 ? PPC::XSTSTDCDP
11509 : PPC::XSTSTDCSP;
11510
11511 if (Mask == fcAllFlags)
11512 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11513 if (Mask == 0)
11514 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11515
11516 // When it's cheaper or necessary to test reverse flags.
11517 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11518 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11519 return DAG.getNOT(Dl, Rev, MVT::i1);
11520 }
11521
11522 // Power doesn't support testing whether a value is 'normal'. Test the rest
11523 // first, and test if it's 'not not-normal' with expected sign.
11524 if (Mask & fcNormal) {
11525 SDValue Rev(DAG.getMachineNode(
11526 TestOp, Dl, MVT::i32,
11527 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11528 DC_NEG_ZERO | DC_POS_ZERO |
11529 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11530 Dl, MVT::i32),
11531 Op),
11532 0);
11533 // Sign are stored in CR bit 0, result are in CR bit 2.
11534 SDValue Sign(
11535 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11536 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11537 0);
11538 SDValue Normal(DAG.getNOT(
11539 Dl,
11541 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11542 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11543 0),
11544 MVT::i1));
11545 if (Mask & fcPosNormal)
11546 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11547 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11548 if (Mask == fcPosNormal || Mask == fcNegNormal)
11549 return Result;
11550
11551 return DAG.getNode(
11552 ISD::OR, Dl, MVT::i1,
11553 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11554 }
11555
11556 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11557 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11558 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11559 bool IsQuiet = Mask & fcQNan;
11560 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11561
11562 // Quietness is determined by the first bit in fraction field.
11563 uint64_t QuietMask = 0;
11564 SDValue HighWord;
11565 if (VT == MVT::f128) {
11566 HighWord = DAG.getNode(
11567 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11568 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11569 QuietMask = 0x8000;
11570 } else if (VT == MVT::f64) {
11571 if (Subtarget.isPPC64()) {
11572 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11573 DAG.getBitcast(MVT::i64, Op),
11574 DAG.getConstant(1, Dl, MVT::i32));
11575 } else {
11576 SDValue Vec = DAG.getBitcast(
11577 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11578 HighWord = DAG.getNode(
11579 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11580 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11581 }
11582 QuietMask = 0x80000;
11583 } else if (VT == MVT::f32) {
11584 HighWord = DAG.getBitcast(MVT::i32, Op);
11585 QuietMask = 0x400000;
11586 }
11587 SDValue NanRes = DAG.getSetCC(
11588 Dl, MVT::i1,
11589 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11590 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11591 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11592 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11593 if (Mask == fcQNan || Mask == fcSNan)
11594 return NanRes;
11595
11596 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11597 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11598 NanRes);
11599 }
11600
11601 unsigned NativeMask = 0;
11602 if ((Mask & fcNan) == fcNan)
11603 NativeMask |= DC_NAN;
11604 if (Mask & fcNegInf)
11605 NativeMask |= DC_NEG_INF;
11606 if (Mask & fcPosInf)
11607 NativeMask |= DC_POS_INF;
11608 if (Mask & fcNegZero)
11609 NativeMask |= DC_NEG_ZERO;
11610 if (Mask & fcPosZero)
11611 NativeMask |= DC_POS_ZERO;
11612 if (Mask & fcNegSubnormal)
11613 NativeMask |= DC_NEG_SUBNORM;
11614 if (Mask & fcPosSubnormal)
11615 NativeMask |= DC_POS_SUBNORM;
11616 return SDValue(
11617 DAG.getMachineNode(
11618 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11620 TestOp, Dl, MVT::i32,
11621 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11622 0),
11623 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11624 0);
11625}
11626
11627SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11628 SelectionDAG &DAG) const {
11629 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11630 SDValue LHS = Op.getOperand(0);
11631 uint64_t RHSC = Op.getConstantOperandVal(1);
11632 SDLoc Dl(Op);
11633 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11634 if (LHS.getValueType() == MVT::ppcf128) {
11635 // The higher part determines the value class.
11636 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11637 DAG.getConstant(1, Dl, MVT::i32));
11638 }
11639
11640 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11641}
11642
11643SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11644 SelectionDAG &DAG) const {
11645 SDLoc dl(Op);
11646
11648 SDValue Op0 = Op.getOperand(0);
11649 EVT ValVT = Op0.getValueType();
11650 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11651 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11652 int64_t IntVal = Op.getConstantOperandVal(0);
11653 if (IntVal >= -16 && IntVal <= 15)
11654 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11655 dl);
11656 }
11657
11658 ReuseLoadInfo RLI;
11659 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11660 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11661 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11662 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11663
11664 MachineMemOperand *MMO =
11666 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11667 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11669 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11670 MVT::i32, MMO);
11671 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
11672 return Bits.getValue(0);
11673 }
11674
11675 // Create a stack slot that is 16-byte aligned.
11676 MachineFrameInfo &MFI = MF.getFrameInfo();
11677 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11678 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11679 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11680
11681 SDValue Val = Op0;
11682 // P10 hardware store forwarding requires that a single store contains all
11683 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11684 // to avoid load hit store on P10 when running binaries compiled for older
11685 // processors by generating two mergeable scalar stores to forward with the
11686 // vector load.
11687 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11688 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11689 ValVT.getSizeInBits() <= 64) {
11690 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11691 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11692 SDValue ShiftBy = DAG.getConstant(
11693 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11694 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11695 SDValue Plus8 =
11696 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11697 SDValue Store2 =
11698 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11699 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11700 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11702 }
11703
11704 // Store the input value into Value#0 of the stack slot.
11705 SDValue Store =
11706 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11707 // Load it out.
11708 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11709}
11710
11711SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11712 SelectionDAG &DAG) const {
11713 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11714 "Should only be called for ISD::INSERT_VECTOR_ELT");
11715
11716 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11717
11718 EVT VT = Op.getValueType();
11719 SDLoc dl(Op);
11720 SDValue V1 = Op.getOperand(0);
11721 SDValue V2 = Op.getOperand(1);
11722
11723 if (VT == MVT::v2f64 && C)
11724 return Op;
11725
11726 if (Subtarget.hasP9Vector()) {
11727 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11728 // because on P10, it allows this specific insert_vector_elt load pattern to
11729 // utilize the refactored load and store infrastructure in order to exploit
11730 // prefixed loads.
11731 // On targets with inexpensive direct moves (Power9 and up), a
11732 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11733 // load since a single precision load will involve conversion to double
11734 // precision on the load followed by another conversion to single precision.
11735 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11736 (isa<LoadSDNode>(V2))) {
11737 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11738 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11739 SDValue InsVecElt =
11740 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11741 BitcastLoad, Op.getOperand(2));
11742 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11743 }
11744 }
11745
11746 if (Subtarget.isISA3_1()) {
11747 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11748 return SDValue();
11749 // On P10, we have legal lowering for constant and variable indices for
11750 // all vectors.
11751 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11752 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11753 return Op;
11754 }
11755
11756 // Before P10, we have legal lowering for constant indices but not for
11757 // variable ones.
11758 if (!C)
11759 return SDValue();
11760
11761 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11762 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11763 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11764 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11765 unsigned InsertAtElement = C->getZExtValue();
11766 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11767 if (Subtarget.isLittleEndian()) {
11768 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11769 }
11770 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11771 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11772 }
11773 return Op;
11774}
11775
11776SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11777 SelectionDAG &DAG) const {
11778 SDLoc dl(Op);
11779 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11780 SDValue LoadChain = LN->getChain();
11781 SDValue BasePtr = LN->getBasePtr();
11782 EVT VT = Op.getValueType();
11783
11784 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11785 return Op;
11786
11787 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11788 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11789 // 2 or 4 vsx registers.
11790 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11791 "Type unsupported without MMA");
11792 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11793 "Type unsupported without paired vector support");
11794 Align Alignment = LN->getAlign();
11796 SmallVector<SDValue, 4> LoadChains;
11797 unsigned NumVecs = VT.getSizeInBits() / 128;
11798 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11799 SDValue Load =
11800 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11801 LN->getPointerInfo().getWithOffset(Idx * 16),
11802 commonAlignment(Alignment, Idx * 16),
11803 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11804 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11805 DAG.getConstant(16, dl, BasePtr.getValueType()));
11806 Loads.push_back(Load);
11807 LoadChains.push_back(Load.getValue(1));
11808 }
11809 if (Subtarget.isLittleEndian()) {
11810 std::reverse(Loads.begin(), Loads.end());
11811 std::reverse(LoadChains.begin(), LoadChains.end());
11812 }
11813 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11814 SDValue Value =
11815 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11816 dl, VT, Loads);
11817 SDValue RetOps[] = {Value, TF};
11818 return DAG.getMergeValues(RetOps, dl);
11819}
11820
11821SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11822 SelectionDAG &DAG) const {
11823 SDLoc dl(Op);
11824 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11825 SDValue StoreChain = SN->getChain();
11826 SDValue BasePtr = SN->getBasePtr();
11827 SDValue Value = SN->getValue();
11828 SDValue Value2 = SN->getValue();
11829 EVT StoreVT = Value.getValueType();
11830
11831 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11832 return Op;
11833
11834 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11835 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11836 // underlying registers individually.
11837 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11838 "Type unsupported without MMA");
11839 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11840 "Type unsupported without paired vector support");
11841 Align Alignment = SN->getAlign();
11843 unsigned NumVecs = 2;
11844 if (StoreVT == MVT::v512i1) {
11845 if (Subtarget.isISAFuture()) {
11846 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11847 MachineSDNode *ExtNode = DAG.getMachineNode(
11848 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
11849
11850 Value = SDValue(ExtNode, 0);
11851 Value2 = SDValue(ExtNode, 1);
11852 } else
11853 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11854 NumVecs = 4;
11855 }
11856 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11857 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11858 SDValue Elt;
11859 if (Subtarget.isISAFuture()) {
11860 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11861 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11862 Idx > 1 ? Value2 : Value,
11863 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11864 } else
11865 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11866 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11867
11868 SDValue Store =
11869 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11870 SN->getPointerInfo().getWithOffset(Idx * 16),
11871 commonAlignment(Alignment, Idx * 16),
11872 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11873 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11874 DAG.getConstant(16, dl, BasePtr.getValueType()));
11875 Stores.push_back(Store);
11876 }
11877 SDValue TF = DAG.getTokenFactor(dl, Stores);
11878 return TF;
11879}
11880
11881SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11882 SDLoc dl(Op);
11883 if (Op.getValueType() == MVT::v4i32) {
11884 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11885
11886 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11887 // +16 as shift amt.
11888 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11889 SDValue RHSSwap = // = vrlw RHS, 16
11890 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11891
11892 // Shrinkify inputs to v8i16.
11893 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11894 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11895 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11896
11897 // Low parts multiplied together, generating 32-bit results (we ignore the
11898 // top parts).
11899 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11900 LHS, RHS, DAG, dl, MVT::v4i32);
11901
11902 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11903 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11904 // Shift the high parts up 16 bits.
11905 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11906 Neg16, DAG, dl);
11907 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11908 } else if (Op.getValueType() == MVT::v16i8) {
11909 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11910 bool isLittleEndian = Subtarget.isLittleEndian();
11911
11912 // Multiply the even 8-bit parts, producing 16-bit sums.
11913 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11914 LHS, RHS, DAG, dl, MVT::v8i16);
11915 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11916
11917 // Multiply the odd 8-bit parts, producing 16-bit sums.
11918 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11919 LHS, RHS, DAG, dl, MVT::v8i16);
11920 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11921
11922 // Merge the results together. Because vmuleub and vmuloub are
11923 // instructions with a big-endian bias, we must reverse the
11924 // element numbering and reverse the meaning of "odd" and "even"
11925 // when generating little endian code.
11926 int Ops[16];
11927 for (unsigned i = 0; i != 8; ++i) {
11928 if (isLittleEndian) {
11929 Ops[i*2 ] = 2*i;
11930 Ops[i*2+1] = 2*i+16;
11931 } else {
11932 Ops[i*2 ] = 2*i+1;
11933 Ops[i*2+1] = 2*i+1+16;
11934 }
11935 }
11936 if (isLittleEndian)
11937 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11938 else
11939 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11940 } else {
11941 llvm_unreachable("Unknown mul to lower!");
11942 }
11943}
11944
11945SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11946 bool IsStrict = Op->isStrictFPOpcode();
11947 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11948 !Subtarget.hasP9Vector())
11949 return SDValue();
11950
11951 return Op;
11952}
11953
11954// Custom lowering for fpext vf32 to v2f64
11955SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11956
11957 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11958 "Should only be called for ISD::FP_EXTEND");
11959
11960 // FIXME: handle extends from half precision float vectors on P9.
11961 // We only want to custom lower an extend from v2f32 to v2f64.
11962 if (Op.getValueType() != MVT::v2f64 ||
11963 Op.getOperand(0).getValueType() != MVT::v2f32)
11964 return SDValue();
11965
11966 SDLoc dl(Op);
11967 SDValue Op0 = Op.getOperand(0);
11968
11969 switch (Op0.getOpcode()) {
11970 default:
11971 return SDValue();
11973 assert(Op0.getNumOperands() == 2 &&
11974 isa<ConstantSDNode>(Op0->getOperand(1)) &&
11975 "Node should have 2 operands with second one being a constant!");
11976
11977 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11978 return SDValue();
11979
11980 // Custom lower is only done for high or low doubleword.
11981 int Idx = Op0.getConstantOperandVal(1);
11982 if (Idx % 2 != 0)
11983 return SDValue();
11984
11985 // Since input is v4f32, at this point Idx is either 0 or 2.
11986 // Shift to get the doubleword position we want.
11987 int DWord = Idx >> 1;
11988
11989 // High and low word positions are different on little endian.
11990 if (Subtarget.isLittleEndian())
11991 DWord ^= 0x1;
11992
11993 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11994 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11995 }
11996 case ISD::FADD:
11997 case ISD::FMUL:
11998 case ISD::FSUB: {
11999 SDValue NewLoad[2];
12000 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12001 // Ensure both input are loads.
12002 SDValue LdOp = Op0.getOperand(i);
12003 if (LdOp.getOpcode() != ISD::LOAD)
12004 return SDValue();
12005 // Generate new load node.
12006 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12007 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12008 NewLoad[i] = DAG.getMemIntrinsicNode(
12009 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12010 LD->getMemoryVT(), LD->getMemOperand());
12011 }
12012 SDValue NewOp =
12013 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12014 NewLoad[1], Op0.getNode()->getFlags());
12015 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12016 DAG.getConstant(0, dl, MVT::i32));
12017 }
12018 case ISD::LOAD: {
12019 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12020 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12021 SDValue NewLd = DAG.getMemIntrinsicNode(
12022 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12023 LD->getMemoryVT(), LD->getMemOperand());
12024 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12025 DAG.getConstant(0, dl, MVT::i32));
12026 }
12027 }
12028 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12029}
12030
12031SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
12032 // Default to target independent lowering if there is a logical user of the
12033 // carry-bit.
12034 for (SDNode *U : Op->users()) {
12035 if (U->getOpcode() == ISD::SELECT)
12036 return SDValue();
12037 if (ISD::isBitwiseLogicOp(U->getOpcode())) {
12038 for (unsigned i = 0, ie = U->getNumOperands(); i != ie; ++i) {
12039 if (U->getOperand(i).getOpcode() != ISD::UADDO &&
12040 U->getOperand(i).getOpcode() != ISD::MERGE_VALUES)
12041 return SDValue();
12042 }
12043 }
12044 }
12045 SDValue LHS = Op.getOperand(0);
12046 SDValue RHS = Op.getOperand(1);
12047 SDLoc dl(Op);
12048
12049 // Default to target independent lowering for special cases handled there.
12050 if (isOneConstant(RHS) || isAllOnesConstant(RHS))
12051 return SDValue();
12052
12053 EVT VT = Op.getNode()->getValueType(0);
12054
12055 SDValue ADDC;
12056 SDValue Overflow;
12057 SDVTList VTs = Op.getNode()->getVTList();
12058
12059 ADDC = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), LHS, RHS);
12060 Overflow = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(VT, MVT::Glue),
12061 DAG.getConstant(0, dl, VT), DAG.getConstant(0, dl, VT),
12062 ADDC.getValue(1));
12063 SDValue OverflowTrunc =
12064 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12065 SDValue Res =
12066 DAG.getNode(ISD::MERGE_VALUES, dl, VTs, ADDC.getValue(0), OverflowTrunc);
12067 return Res;
12068}
12069
12070SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12071
12072 SDLoc dl(Op);
12073 SDValue LHS = Op.getOperand(0);
12074 SDValue RHS = Op.getOperand(1);
12075 EVT VT = Op.getNode()->getValueType(0);
12076
12077 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12078
12079 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12080 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12081
12082 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12083
12084 SDValue Overflow =
12085 DAG.getNode(ISD::SRL, dl, VT, And,
12086 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12087
12088 SDValue OverflowTrunc =
12089 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12090
12091 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12092}
12093
12094/// LowerOperation - Provide custom lowering hooks for some operations.
12095///
12097 switch (Op.getOpcode()) {
12098 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
12099 case ISD::UADDO: return LowerUaddo(Op, DAG);
12100 case ISD::FPOW: return lowerPow(Op, DAG);
12101 case ISD::FSIN: return lowerSin(Op, DAG);
12102 case ISD::FCOS: return lowerCos(Op, DAG);
12103 case ISD::FLOG: return lowerLog(Op, DAG);
12104 case ISD::FLOG10: return lowerLog10(Op, DAG);
12105 case ISD::FEXP: return lowerExp(Op, DAG);
12106 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12107 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12108 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12109 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12110 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12111 case ISD::STRICT_FSETCC:
12113 case ISD::SETCC: return LowerSETCC(Op, DAG);
12114 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12115 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12116 case ISD::SSUBO:
12117 return LowerSSUBO(Op, DAG);
12118
12119 case ISD::INLINEASM:
12120 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12121 // Variable argument lowering.
12122 case ISD::VASTART: return LowerVASTART(Op, DAG);
12123 case ISD::VAARG: return LowerVAARG(Op, DAG);
12124 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12125
12126 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12127 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12129 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12130
12131 // Exception handling lowering.
12132 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12133 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12134 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12135
12136 case ISD::LOAD: return LowerLOAD(Op, DAG);
12137 case ISD::STORE: return LowerSTORE(Op, DAG);
12138 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12139 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12142 case ISD::FP_TO_UINT:
12143 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12146 case ISD::UINT_TO_FP:
12147 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12148 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12149 case ISD::SET_ROUNDING:
12150 return LowerSET_ROUNDING(Op, DAG);
12151
12152 // Lower 64-bit shifts.
12153 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12154 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12155 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12156
12157 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12158 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12159
12160 // Vector-related lowering.
12161 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12162 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12163 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12164 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12165 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12166 case ISD::MUL: return LowerMUL(Op, DAG);
12167 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12169 case ISD::FP_ROUND:
12170 return LowerFP_ROUND(Op, DAG);
12171 case ISD::ROTL: return LowerROTL(Op, DAG);
12172
12173 // For counter-based loop handling.
12174 case ISD::INTRINSIC_W_CHAIN: return SDValue();
12175
12176 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12177
12178 // Frame & Return address.
12179 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12180 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12181
12183 return LowerINTRINSIC_VOID(Op, DAG);
12184 case ISD::BSWAP:
12185 return LowerBSWAP(Op, DAG);
12187 return LowerATOMIC_CMP_SWAP(Op, DAG);
12188 case ISD::ATOMIC_STORE:
12189 return LowerATOMIC_LOAD_STORE(Op, DAG);
12190 case ISD::IS_FPCLASS:
12191 return LowerIS_FPCLASS(Op, DAG);
12192 }
12193}
12194
12197 SelectionDAG &DAG) const {
12198 SDLoc dl(N);
12199 switch (N->getOpcode()) {
12200 default:
12201 llvm_unreachable("Do not know how to custom type legalize this operation!");
12202 case ISD::ATOMIC_LOAD: {
12203 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12204 Results.push_back(Res);
12205 Results.push_back(Res.getValue(1));
12206 break;
12207 }
12208 case ISD::READCYCLECOUNTER: {
12209 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12210 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12211
12212 Results.push_back(
12213 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12214 Results.push_back(RTB.getValue(2));
12215 break;
12216 }
12218 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12219 break;
12220
12221 assert(N->getValueType(0) == MVT::i1 &&
12222 "Unexpected result type for CTR decrement intrinsic");
12223 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12224 N->getValueType(0));
12225 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12226 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12227 N->getOperand(1));
12228
12229 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12230 Results.push_back(NewInt.getValue(1));
12231 break;
12232 }
12234 switch (N->getConstantOperandVal(0)) {
12235 case Intrinsic::ppc_pack_longdouble:
12236 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12237 N->getOperand(2), N->getOperand(1)));
12238 break;
12239 case Intrinsic::ppc_maxfe:
12240 case Intrinsic::ppc_minfe:
12241 case Intrinsic::ppc_fnmsub:
12242 case Intrinsic::ppc_convert_f128_to_ppcf128:
12243 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12244 break;
12245 }
12246 break;
12247 }
12248 case ISD::VAARG: {
12249 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12250 return;
12251
12252 EVT VT = N->getValueType(0);
12253
12254 if (VT == MVT::i64) {
12255 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12256
12257 Results.push_back(NewNode);
12258 Results.push_back(NewNode.getValue(1));
12259 }
12260 return;
12261 }
12264 case ISD::FP_TO_SINT:
12265 case ISD::FP_TO_UINT: {
12266 // LowerFP_TO_INT() can only handle f32 and f64.
12267 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12268 MVT::ppcf128)
12269 return;
12270 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12271 Results.push_back(LoweredValue);
12272 if (N->isStrictFPOpcode())
12273 Results.push_back(LoweredValue.getValue(1));
12274 return;
12275 }
12276 case ISD::TRUNCATE: {
12277 if (!N->getValueType(0).isVector())
12278 return;
12279 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12280 if (Lowered)
12281 Results.push_back(Lowered);
12282 return;
12283 }
12284 case ISD::SCALAR_TO_VECTOR: {
12285 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12286 if (Lowered)
12287 Results.push_back(Lowered);
12288 return;
12289 }
12290 case ISD::FSHL:
12291 case ISD::FSHR:
12292 // Don't handle funnel shifts here.
12293 return;
12294 case ISD::BITCAST:
12295 // Don't handle bitcast here.
12296 return;
12297 case ISD::FP_EXTEND:
12298 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12299 if (Lowered)
12300 Results.push_back(Lowered);
12301 return;
12302 }
12303}
12304
12305//===----------------------------------------------------------------------===//
12306// Other Lowering Code
12307//===----------------------------------------------------------------------===//
12308
12310 return Builder.CreateIntrinsic(Id, {}, {});
12311}
12312
12313// The mappings for emitLeading/TrailingFence is taken from
12314// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12316 Instruction *Inst,
12317 AtomicOrdering Ord) const {
12319 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12320 if (isReleaseOrStronger(Ord))
12321 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12322 return nullptr;
12323}
12324
12326 Instruction *Inst,
12327 AtomicOrdering Ord) const {
12328 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12329 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12330 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12331 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12332 if (isa<LoadInst>(Inst))
12333 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
12334 {Inst});
12335 // FIXME: Can use isync for rmw operation.
12336 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12337 }
12338 return nullptr;
12339}
12340
12343 unsigned AtomicSize,
12344 unsigned BinOpcode,
12345 unsigned CmpOpcode,
12346 unsigned CmpPred) const {
12347 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12348 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12349
12350 auto LoadMnemonic = PPC::LDARX;
12351 auto StoreMnemonic = PPC::STDCX;
12352 switch (AtomicSize) {
12353 default:
12354 llvm_unreachable("Unexpected size of atomic entity");
12355 case 1:
12356 LoadMnemonic = PPC::LBARX;
12357 StoreMnemonic = PPC::STBCX;
12358 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12359 break;
12360 case 2:
12361 LoadMnemonic = PPC::LHARX;
12362 StoreMnemonic = PPC::STHCX;
12363 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12364 break;
12365 case 4:
12366 LoadMnemonic = PPC::LWARX;
12367 StoreMnemonic = PPC::STWCX;
12368 break;
12369 case 8:
12370 LoadMnemonic = PPC::LDARX;
12371 StoreMnemonic = PPC::STDCX;
12372 break;
12373 }
12374
12375 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12376 MachineFunction *F = BB->getParent();
12378
12379 Register dest = MI.getOperand(0).getReg();
12380 Register ptrA = MI.getOperand(1).getReg();
12381 Register ptrB = MI.getOperand(2).getReg();
12382 Register incr = MI.getOperand(3).getReg();
12383 DebugLoc dl = MI.getDebugLoc();
12384
12385 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12386 MachineBasicBlock *loop2MBB =
12387 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12388 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12389 F->insert(It, loopMBB);
12390 if (CmpOpcode)
12391 F->insert(It, loop2MBB);
12392 F->insert(It, exitMBB);
12393 exitMBB->splice(exitMBB->begin(), BB,
12394 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12396
12397 MachineRegisterInfo &RegInfo = F->getRegInfo();
12398 Register TmpReg = (!BinOpcode) ? incr :
12399 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12400 : &PPC::GPRCRegClass);
12401
12402 // thisMBB:
12403 // ...
12404 // fallthrough --> loopMBB
12405 BB->addSuccessor(loopMBB);
12406
12407 // loopMBB:
12408 // l[wd]arx dest, ptr
12409 // add r0, dest, incr
12410 // st[wd]cx. r0, ptr
12411 // bne- loopMBB
12412 // fallthrough --> exitMBB
12413
12414 // For max/min...
12415 // loopMBB:
12416 // l[wd]arx dest, ptr
12417 // cmpl?[wd] dest, incr
12418 // bgt exitMBB
12419 // loop2MBB:
12420 // st[wd]cx. dest, ptr
12421 // bne- loopMBB
12422 // fallthrough --> exitMBB
12423
12424 BB = loopMBB;
12425 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12426 .addReg(ptrA).addReg(ptrB);
12427 if (BinOpcode)
12428 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12429 if (CmpOpcode) {
12430 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12431 // Signed comparisons of byte or halfword values must be sign-extended.
12432 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12433 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12434 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12435 ExtReg).addReg(dest);
12436 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12437 } else
12438 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12439
12440 BuildMI(BB, dl, TII->get(PPC::BCC))
12441 .addImm(CmpPred)
12442 .addReg(CrReg)
12443 .addMBB(exitMBB);
12444 BB->addSuccessor(loop2MBB);
12445 BB->addSuccessor(exitMBB);
12446 BB = loop2MBB;
12447 }
12448 BuildMI(BB, dl, TII->get(StoreMnemonic))
12449 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12450 BuildMI(BB, dl, TII->get(PPC::BCC))
12451 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12452 BB->addSuccessor(loopMBB);
12453 BB->addSuccessor(exitMBB);
12454
12455 // exitMBB:
12456 // ...
12457 BB = exitMBB;
12458 return BB;
12459}
12460
12462 switch(MI.getOpcode()) {
12463 default:
12464 return false;
12465 case PPC::COPY:
12466 return TII->isSignExtended(MI.getOperand(1).getReg(),
12467 &MI.getMF()->getRegInfo());
12468 case PPC::LHA:
12469 case PPC::LHA8:
12470 case PPC::LHAU:
12471 case PPC::LHAU8:
12472 case PPC::LHAUX:
12473 case PPC::LHAUX8:
12474 case PPC::LHAX:
12475 case PPC::LHAX8:
12476 case PPC::LWA:
12477 case PPC::LWAUX:
12478 case PPC::LWAX:
12479 case PPC::LWAX_32:
12480 case PPC::LWA_32:
12481 case PPC::PLHA:
12482 case PPC::PLHA8:
12483 case PPC::PLHA8pc:
12484 case PPC::PLHApc:
12485 case PPC::PLWA:
12486 case PPC::PLWA8:
12487 case PPC::PLWA8pc:
12488 case PPC::PLWApc:
12489 case PPC::EXTSB:
12490 case PPC::EXTSB8:
12491 case PPC::EXTSB8_32_64:
12492 case PPC::EXTSB8_rec:
12493 case PPC::EXTSB_rec:
12494 case PPC::EXTSH:
12495 case PPC::EXTSH8:
12496 case PPC::EXTSH8_32_64:
12497 case PPC::EXTSH8_rec:
12498 case PPC::EXTSH_rec:
12499 case PPC::EXTSW:
12500 case PPC::EXTSWSLI:
12501 case PPC::EXTSWSLI_32_64:
12502 case PPC::EXTSWSLI_32_64_rec:
12503 case PPC::EXTSWSLI_rec:
12504 case PPC::EXTSW_32:
12505 case PPC::EXTSW_32_64:
12506 case PPC::EXTSW_32_64_rec:
12507 case PPC::EXTSW_rec:
12508 case PPC::SRAW:
12509 case PPC::SRAWI:
12510 case PPC::SRAWI_rec:
12511 case PPC::SRAW_rec:
12512 return true;
12513 }
12514 return false;
12515}
12516
12519 bool is8bit, // operation
12520 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12521 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12522 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12523
12524 // If this is a signed comparison and the value being compared is not known
12525 // to be sign extended, sign extend it here.
12526 DebugLoc dl = MI.getDebugLoc();
12527 MachineFunction *F = BB->getParent();
12528 MachineRegisterInfo &RegInfo = F->getRegInfo();
12529 Register incr = MI.getOperand(3).getReg();
12530 bool IsSignExtended =
12531 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12532
12533 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12534 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12535 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12536 .addReg(MI.getOperand(3).getReg());
12537 MI.getOperand(3).setReg(ValueReg);
12538 incr = ValueReg;
12539 }
12540 // If we support part-word atomic mnemonics, just use them
12541 if (Subtarget.hasPartwordAtomics())
12542 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12543 CmpPred);
12544
12545 // In 64 bit mode we have to use 64 bits for addresses, even though the
12546 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12547 // registers without caring whether they're 32 or 64, but here we're
12548 // doing actual arithmetic on the addresses.
12549 bool is64bit = Subtarget.isPPC64();
12550 bool isLittleEndian = Subtarget.isLittleEndian();
12551 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12552
12553 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12555
12556 Register dest = MI.getOperand(0).getReg();
12557 Register ptrA = MI.getOperand(1).getReg();
12558 Register ptrB = MI.getOperand(2).getReg();
12559
12560 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12561 MachineBasicBlock *loop2MBB =
12562 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12563 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12564 F->insert(It, loopMBB);
12565 if (CmpOpcode)
12566 F->insert(It, loop2MBB);
12567 F->insert(It, exitMBB);
12568 exitMBB->splice(exitMBB->begin(), BB,
12569 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12571
12572 const TargetRegisterClass *RC =
12573 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12574 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12575
12576 Register PtrReg = RegInfo.createVirtualRegister(RC);
12577 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12578 Register ShiftReg =
12579 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12580 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12581 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12582 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12583 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12584 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12585 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12586 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12587 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12588 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12589 Register Ptr1Reg;
12590 Register TmpReg =
12591 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12592
12593 // thisMBB:
12594 // ...
12595 // fallthrough --> loopMBB
12596 BB->addSuccessor(loopMBB);
12597
12598 // The 4-byte load must be aligned, while a char or short may be
12599 // anywhere in the word. Hence all this nasty bookkeeping code.
12600 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12601 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12602 // xori shift, shift1, 24 [16]
12603 // rlwinm ptr, ptr1, 0, 0, 29
12604 // slw incr2, incr, shift
12605 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12606 // slw mask, mask2, shift
12607 // loopMBB:
12608 // lwarx tmpDest, ptr
12609 // add tmp, tmpDest, incr2
12610 // andc tmp2, tmpDest, mask
12611 // and tmp3, tmp, mask
12612 // or tmp4, tmp3, tmp2
12613 // stwcx. tmp4, ptr
12614 // bne- loopMBB
12615 // fallthrough --> exitMBB
12616 // srw SrwDest, tmpDest, shift
12617 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12618 if (ptrA != ZeroReg) {
12619 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12620 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12621 .addReg(ptrA)
12622 .addReg(ptrB);
12623 } else {
12624 Ptr1Reg = ptrB;
12625 }
12626 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12627 // mode.
12628 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12629 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12630 .addImm(3)
12631 .addImm(27)
12632 .addImm(is8bit ? 28 : 27);
12633 if (!isLittleEndian)
12634 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12635 .addReg(Shift1Reg)
12636 .addImm(is8bit ? 24 : 16);
12637 if (is64bit)
12638 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12639 .addReg(Ptr1Reg)
12640 .addImm(0)
12641 .addImm(61);
12642 else
12643 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12644 .addReg(Ptr1Reg)
12645 .addImm(0)
12646 .addImm(0)
12647 .addImm(29);
12648 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12649 if (is8bit)
12650 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12651 else {
12652 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12653 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12654 .addReg(Mask3Reg)
12655 .addImm(65535);
12656 }
12657 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12658 .addReg(Mask2Reg)
12659 .addReg(ShiftReg);
12660
12661 BB = loopMBB;
12662 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12663 .addReg(ZeroReg)
12664 .addReg(PtrReg);
12665 if (BinOpcode)
12666 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12667 .addReg(Incr2Reg)
12668 .addReg(TmpDestReg);
12669 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12670 .addReg(TmpDestReg)
12671 .addReg(MaskReg);
12672 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12673 if (CmpOpcode) {
12674 // For unsigned comparisons, we can directly compare the shifted values.
12675 // For signed comparisons we shift and sign extend.
12676 Register SReg = RegInfo.createVirtualRegister(GPRC);
12677 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12678 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12679 .addReg(TmpDestReg)
12680 .addReg(MaskReg);
12681 unsigned ValueReg = SReg;
12682 unsigned CmpReg = Incr2Reg;
12683 if (CmpOpcode == PPC::CMPW) {
12684 ValueReg = RegInfo.createVirtualRegister(GPRC);
12685 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12686 .addReg(SReg)
12687 .addReg(ShiftReg);
12688 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12689 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12690 .addReg(ValueReg);
12691 ValueReg = ValueSReg;
12692 CmpReg = incr;
12693 }
12694 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12695 BuildMI(BB, dl, TII->get(PPC::BCC))
12696 .addImm(CmpPred)
12697 .addReg(CrReg)
12698 .addMBB(exitMBB);
12699 BB->addSuccessor(loop2MBB);
12700 BB->addSuccessor(exitMBB);
12701 BB = loop2MBB;
12702 }
12703 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12704 BuildMI(BB, dl, TII->get(PPC::STWCX))
12705 .addReg(Tmp4Reg)
12706 .addReg(ZeroReg)
12707 .addReg(PtrReg);
12708 BuildMI(BB, dl, TII->get(PPC::BCC))
12710 .addReg(PPC::CR0)
12711 .addMBB(loopMBB);
12712 BB->addSuccessor(loopMBB);
12713 BB->addSuccessor(exitMBB);
12714
12715 // exitMBB:
12716 // ...
12717 BB = exitMBB;
12718 // Since the shift amount is not a constant, we need to clear
12719 // the upper bits with a separate RLWINM.
12720 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12721 .addReg(SrwDestReg)
12722 .addImm(0)
12723 .addImm(is8bit ? 24 : 16)
12724 .addImm(31);
12725 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12726 .addReg(TmpDestReg)
12727 .addReg(ShiftReg);
12728 return BB;
12729}
12730
12733 MachineBasicBlock *MBB) const {
12734 DebugLoc DL = MI.getDebugLoc();
12735 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12736 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12737
12738 MachineFunction *MF = MBB->getParent();
12740
12741 const BasicBlock *BB = MBB->getBasicBlock();
12743
12744 Register DstReg = MI.getOperand(0).getReg();
12745 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12746 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12747 Register mainDstReg = MRI.createVirtualRegister(RC);
12748 Register restoreDstReg = MRI.createVirtualRegister(RC);
12749
12750 MVT PVT = getPointerTy(MF->getDataLayout());
12751 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12752 "Invalid Pointer Size!");
12753 // For v = setjmp(buf), we generate
12754 //
12755 // thisMBB:
12756 // SjLjSetup mainMBB
12757 // bl mainMBB
12758 // v_restore = 1
12759 // b sinkMBB
12760 //
12761 // mainMBB:
12762 // buf[LabelOffset] = LR
12763 // v_main = 0
12764 //
12765 // sinkMBB:
12766 // v = phi(main, restore)
12767 //
12768
12769 MachineBasicBlock *thisMBB = MBB;
12770 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12771 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12772 MF->insert(I, mainMBB);
12773 MF->insert(I, sinkMBB);
12774
12776
12777 // Transfer the remainder of BB and its successor edges to sinkMBB.
12778 sinkMBB->splice(sinkMBB->begin(), MBB,
12779 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12781
12782 // Note that the structure of the jmp_buf used here is not compatible
12783 // with that used by libc, and is not designed to be. Specifically, it
12784 // stores only those 'reserved' registers that LLVM does not otherwise
12785 // understand how to spill. Also, by convention, by the time this
12786 // intrinsic is called, Clang has already stored the frame address in the
12787 // first slot of the buffer and stack address in the third. Following the
12788 // X86 target code, we'll store the jump address in the second slot. We also
12789 // need to save the TOC pointer (R2) to handle jumps between shared
12790 // libraries, and that will be stored in the fourth slot. The thread
12791 // identifier (R13) is not affected.
12792
12793 // thisMBB:
12794 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12795 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12796 const int64_t BPOffset = 4 * PVT.getStoreSize();
12797
12798 // Prepare IP either in reg.
12799 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12800 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12801 Register BufReg = MI.getOperand(1).getReg();
12802
12803 if (Subtarget.is64BitELFABI()) {
12805 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12806 .addReg(PPC::X2)
12807 .addImm(TOCOffset)
12808 .addReg(BufReg)
12809 .cloneMemRefs(MI);
12810 }
12811
12812 // Naked functions never have a base pointer, and so we use r1. For all
12813 // other functions, this decision must be delayed until during PEI.
12814 unsigned BaseReg;
12815 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12816 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12817 else
12818 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12819
12820 MIB = BuildMI(*thisMBB, MI, DL,
12821 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12822 .addReg(BaseReg)
12823 .addImm(BPOffset)
12824 .addReg(BufReg)
12825 .cloneMemRefs(MI);
12826
12827 // Setup
12828 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12829 MIB.addRegMask(TRI->getNoPreservedMask());
12830
12831 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12832
12833 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12834 .addMBB(mainMBB);
12835 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12836
12837 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12838 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12839
12840 // mainMBB:
12841 // mainDstReg = 0
12842 MIB =
12843 BuildMI(mainMBB, DL,
12844 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12845
12846 // Store IP
12847 if (Subtarget.isPPC64()) {
12848 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12849 .addReg(LabelReg)
12850 .addImm(LabelOffset)
12851 .addReg(BufReg);
12852 } else {
12853 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12854 .addReg(LabelReg)
12855 .addImm(LabelOffset)
12856 .addReg(BufReg);
12857 }
12858 MIB.cloneMemRefs(MI);
12859
12860 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12861 mainMBB->addSuccessor(sinkMBB);
12862
12863 // sinkMBB:
12864 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12865 TII->get(PPC::PHI), DstReg)
12866 .addReg(mainDstReg).addMBB(mainMBB)
12867 .addReg(restoreDstReg).addMBB(thisMBB);
12868
12869 MI.eraseFromParent();
12870 return sinkMBB;
12871}
12872
12875 MachineBasicBlock *MBB) const {
12876 DebugLoc DL = MI.getDebugLoc();
12877 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12878
12879 MachineFunction *MF = MBB->getParent();
12881
12882 MVT PVT = getPointerTy(MF->getDataLayout());
12883 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12884 "Invalid Pointer Size!");
12885
12886 const TargetRegisterClass *RC =
12887 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12888 Register Tmp = MRI.createVirtualRegister(RC);
12889 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12890 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12891 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12892 unsigned BP =
12893 (PVT == MVT::i64)
12894 ? PPC::X30
12895 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12896 : PPC::R30);
12897
12899
12900 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12901 const int64_t SPOffset = 2 * PVT.getStoreSize();
12902 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12903 const int64_t BPOffset = 4 * PVT.getStoreSize();
12904
12905 Register BufReg = MI.getOperand(0).getReg();
12906
12907 // Reload FP (the jumped-to function may not have had a
12908 // frame pointer, and if so, then its r31 will be restored
12909 // as necessary).
12910 if (PVT == MVT::i64) {
12911 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12912 .addImm(0)
12913 .addReg(BufReg);
12914 } else {
12915 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12916 .addImm(0)
12917 .addReg(BufReg);
12918 }
12919 MIB.cloneMemRefs(MI);
12920
12921 // Reload IP
12922 if (PVT == MVT::i64) {
12923 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12924 .addImm(LabelOffset)
12925 .addReg(BufReg);
12926 } else {
12927 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12928 .addImm(LabelOffset)
12929 .addReg(BufReg);
12930 }
12931 MIB.cloneMemRefs(MI);
12932
12933 // Reload SP
12934 if (PVT == MVT::i64) {
12935 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12936 .addImm(SPOffset)
12937 .addReg(BufReg);
12938 } else {
12939 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12940 .addImm(SPOffset)
12941 .addReg(BufReg);
12942 }
12943 MIB.cloneMemRefs(MI);
12944
12945 // Reload BP
12946 if (PVT == MVT::i64) {
12947 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12948 .addImm(BPOffset)
12949 .addReg(BufReg);
12950 } else {
12951 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12952 .addImm(BPOffset)
12953 .addReg(BufReg);
12954 }
12955 MIB.cloneMemRefs(MI);
12956
12957 // Reload TOC
12958 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12960 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12961 .addImm(TOCOffset)
12962 .addReg(BufReg)
12963 .cloneMemRefs(MI);
12964 }
12965
12966 // Jump
12967 BuildMI(*MBB, MI, DL,
12968 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12969 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12970
12971 MI.eraseFromParent();
12972 return MBB;
12973}
12974
12976 // If the function specifically requests inline stack probes, emit them.
12977 if (MF.getFunction().hasFnAttribute("probe-stack"))
12978 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12979 "inline-asm";
12980 return false;
12981}
12982
12984 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12985 unsigned StackAlign = TFI->getStackAlignment();
12986 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12987 "Unexpected stack alignment");
12988 // The default stack probe size is 4096 if the function has no
12989 // stack-probe-size attribute.
12990 const Function &Fn = MF.getFunction();
12991 unsigned StackProbeSize =
12992 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12993 // Round down to the stack alignment.
12994 StackProbeSize &= ~(StackAlign - 1);
12995 return StackProbeSize ? StackProbeSize : StackAlign;
12996}
12997
12998// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12999// into three phases. In the first phase, it uses pseudo instruction
13000// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13001// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13002// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13003// MaxCallFrameSize so that it can calculate correct data area pointer.
13006 MachineBasicBlock *MBB) const {
13007 const bool isPPC64 = Subtarget.isPPC64();
13008 MachineFunction *MF = MBB->getParent();
13009 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13010 DebugLoc DL = MI.getDebugLoc();
13011 const unsigned ProbeSize = getStackProbeSize(*MF);
13012 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13014 // The CFG of probing stack looks as
13015 // +-----+
13016 // | MBB |
13017 // +--+--+
13018 // |
13019 // +----v----+
13020 // +--->+ TestMBB +---+
13021 // | +----+----+ |
13022 // | | |
13023 // | +-----v----+ |
13024 // +---+ BlockMBB | |
13025 // +----------+ |
13026 // |
13027 // +---------+ |
13028 // | TailMBB +<--+
13029 // +---------+
13030 // In MBB, calculate previous frame pointer and final stack pointer.
13031 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13032 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13033 // TailMBB is spliced via \p MI.
13034 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13035 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13036 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13037
13039 MF->insert(MBBIter, TestMBB);
13040 MF->insert(MBBIter, BlockMBB);
13041 MF->insert(MBBIter, TailMBB);
13042
13043 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13044 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13045
13046 Register DstReg = MI.getOperand(0).getReg();
13047 Register NegSizeReg = MI.getOperand(1).getReg();
13048 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13049 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13050 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13051 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13052
13053 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13054 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13055 // NegSize.
13056 unsigned ProbeOpc;
13057 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13058 ProbeOpc =
13059 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13060 else
13061 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13062 // and NegSizeReg will be allocated in the same phyreg to avoid
13063 // redundant copy when NegSizeReg has only one use which is current MI and
13064 // will be replaced by PREPARE_PROBED_ALLOCA then.
13065 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13066 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13067 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13068 .addDef(ActualNegSizeReg)
13069 .addReg(NegSizeReg)
13070 .add(MI.getOperand(2))
13071 .add(MI.getOperand(3));
13072
13073 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13074 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13075 FinalStackPtr)
13076 .addReg(SPReg)
13077 .addReg(ActualNegSizeReg);
13078
13079 // Materialize a scratch register for update.
13080 int64_t NegProbeSize = -(int64_t)ProbeSize;
13081 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13082 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13083 if (!isInt<16>(NegProbeSize)) {
13084 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13085 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13086 .addImm(NegProbeSize >> 16);
13087 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13088 ScratchReg)
13089 .addReg(TempReg)
13090 .addImm(NegProbeSize & 0xFFFF);
13091 } else
13092 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13093 .addImm(NegProbeSize);
13094
13095 {
13096 // Probing leading residual part.
13097 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13098 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13099 .addReg(ActualNegSizeReg)
13100 .addReg(ScratchReg);
13101 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13102 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13103 .addReg(Div)
13104 .addReg(ScratchReg);
13105 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13106 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13107 .addReg(Mul)
13108 .addReg(ActualNegSizeReg);
13109 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13110 .addReg(FramePointer)
13111 .addReg(SPReg)
13112 .addReg(NegMod);
13113 }
13114
13115 {
13116 // Remaining part should be multiple of ProbeSize.
13117 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13118 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13119 .addReg(SPReg)
13120 .addReg(FinalStackPtr);
13121 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13123 .addReg(CmpResult)
13124 .addMBB(TailMBB);
13125 TestMBB->addSuccessor(BlockMBB);
13126 TestMBB->addSuccessor(TailMBB);
13127 }
13128
13129 {
13130 // Touch the block.
13131 // |P...|P...|P...
13132 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13133 .addReg(FramePointer)
13134 .addReg(SPReg)
13135 .addReg(ScratchReg);
13136 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13137 BlockMBB->addSuccessor(TestMBB);
13138 }
13139
13140 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13141 // DYNAREAOFFSET pseudo instruction to get the future result.
13142 Register MaxCallFrameSizeReg =
13143 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13144 BuildMI(TailMBB, DL,
13145 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13146 MaxCallFrameSizeReg)
13147 .add(MI.getOperand(2))
13148 .add(MI.getOperand(3));
13149 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13150 .addReg(SPReg)
13151 .addReg(MaxCallFrameSizeReg);
13152
13153 // Splice instructions after MI to TailMBB.
13154 TailMBB->splice(TailMBB->end(), MBB,
13155 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13157 MBB->addSuccessor(TestMBB);
13158
13159 // Delete the pseudo instruction.
13160 MI.eraseFromParent();
13161
13162 ++NumDynamicAllocaProbed;
13163 return TailMBB;
13164}
13165
13167 switch (MI.getOpcode()) {
13168 case PPC::SELECT_CC_I4:
13169 case PPC::SELECT_CC_I8:
13170 case PPC::SELECT_CC_F4:
13171 case PPC::SELECT_CC_F8:
13172 case PPC::SELECT_CC_F16:
13173 case PPC::SELECT_CC_VRRC:
13174 case PPC::SELECT_CC_VSFRC:
13175 case PPC::SELECT_CC_VSSRC:
13176 case PPC::SELECT_CC_VSRC:
13177 case PPC::SELECT_CC_SPE4:
13178 case PPC::SELECT_CC_SPE:
13179 return true;
13180 default:
13181 return false;
13182 }
13183}
13184
13185static bool IsSelect(MachineInstr &MI) {
13186 switch (MI.getOpcode()) {
13187 case PPC::SELECT_I4:
13188 case PPC::SELECT_I8:
13189 case PPC::SELECT_F4:
13190 case PPC::SELECT_F8:
13191 case PPC::SELECT_F16:
13192 case PPC::SELECT_SPE:
13193 case PPC::SELECT_SPE4:
13194 case PPC::SELECT_VRRC:
13195 case PPC::SELECT_VSFRC:
13196 case PPC::SELECT_VSSRC:
13197 case PPC::SELECT_VSRC:
13198 return true;
13199 default:
13200 return false;
13201 }
13202}
13203
13206 MachineBasicBlock *BB) const {
13207 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13208 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13209 if (Subtarget.is64BitELFABI() &&
13210 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13211 !Subtarget.isUsingPCRelativeCalls()) {
13212 // Call lowering should have added an r2 operand to indicate a dependence
13213 // on the TOC base pointer value. It can't however, because there is no
13214 // way to mark the dependence as implicit there, and so the stackmap code
13215 // will confuse it with a regular operand. Instead, add the dependence
13216 // here.
13217 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13218 }
13219
13220 return emitPatchPoint(MI, BB);
13221 }
13222
13223 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13224 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13225 return emitEHSjLjSetJmp(MI, BB);
13226 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13227 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13228 return emitEHSjLjLongJmp(MI, BB);
13229 }
13230
13231 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13232
13233 // To "insert" these instructions we actually have to insert their
13234 // control-flow patterns.
13235 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13237
13238 MachineFunction *F = BB->getParent();
13239 MachineRegisterInfo &MRI = F->getRegInfo();
13240
13241 if (Subtarget.hasISEL() &&
13242 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13243 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13244 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13246 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13247 MI.getOpcode() == PPC::SELECT_CC_I8)
13248 Cond.push_back(MI.getOperand(4));
13249 else
13251 Cond.push_back(MI.getOperand(1));
13252
13253 DebugLoc dl = MI.getDebugLoc();
13254 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13255 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13256 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13257 // The incoming instruction knows the destination vreg to set, the
13258 // condition code register to branch on, the true/false values to
13259 // select between, and a branch opcode to use.
13260
13261 // thisMBB:
13262 // ...
13263 // TrueVal = ...
13264 // cmpTY ccX, r1, r2
13265 // bCC sinkMBB
13266 // fallthrough --> copy0MBB
13267 MachineBasicBlock *thisMBB = BB;
13268 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13269 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13270 DebugLoc dl = MI.getDebugLoc();
13271 F->insert(It, copy0MBB);
13272 F->insert(It, sinkMBB);
13273
13274 // Set the call frame size on entry to the new basic blocks.
13275 // See https://reviews.llvm.org/D156113.
13276 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13277 copy0MBB->setCallFrameSize(CallFrameSize);
13278 sinkMBB->setCallFrameSize(CallFrameSize);
13279
13280 // Transfer the remainder of BB and its successor edges to sinkMBB.
13281 sinkMBB->splice(sinkMBB->begin(), BB,
13282 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13284
13285 // Next, add the true and fallthrough blocks as its successors.
13286 BB->addSuccessor(copy0MBB);
13287 BB->addSuccessor(sinkMBB);
13288
13289 if (IsSelect(MI)) {
13290 BuildMI(BB, dl, TII->get(PPC::BC))
13291 .addReg(MI.getOperand(1).getReg())
13292 .addMBB(sinkMBB);
13293 } else {
13294 unsigned SelectPred = MI.getOperand(4).getImm();
13295 BuildMI(BB, dl, TII->get(PPC::BCC))
13296 .addImm(SelectPred)
13297 .addReg(MI.getOperand(1).getReg())
13298 .addMBB(sinkMBB);
13299 }
13300
13301 // copy0MBB:
13302 // %FalseValue = ...
13303 // # fallthrough to sinkMBB
13304 BB = copy0MBB;
13305
13306 // Update machine-CFG edges
13307 BB->addSuccessor(sinkMBB);
13308
13309 // sinkMBB:
13310 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13311 // ...
13312 BB = sinkMBB;
13313 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13314 .addReg(MI.getOperand(3).getReg())
13315 .addMBB(copy0MBB)
13316 .addReg(MI.getOperand(2).getReg())
13317 .addMBB(thisMBB);
13318 } else if (MI.getOpcode() == PPC::ReadTB) {
13319 // To read the 64-bit time-base register on a 32-bit target, we read the
13320 // two halves. Should the counter have wrapped while it was being read, we
13321 // need to try again.
13322 // ...
13323 // readLoop:
13324 // mfspr Rx,TBU # load from TBU
13325 // mfspr Ry,TB # load from TB
13326 // mfspr Rz,TBU # load from TBU
13327 // cmpw crX,Rx,Rz # check if 'old'='new'
13328 // bne readLoop # branch if they're not equal
13329 // ...
13330
13331 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13332 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13333 DebugLoc dl = MI.getDebugLoc();
13334 F->insert(It, readMBB);
13335 F->insert(It, sinkMBB);
13336
13337 // Transfer the remainder of BB and its successor edges to sinkMBB.
13338 sinkMBB->splice(sinkMBB->begin(), BB,
13339 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13341
13342 BB->addSuccessor(readMBB);
13343 BB = readMBB;
13344
13345 MachineRegisterInfo &RegInfo = F->getRegInfo();
13346 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13347 Register LoReg = MI.getOperand(0).getReg();
13348 Register HiReg = MI.getOperand(1).getReg();
13349
13350 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13351 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13352 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13353
13354 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13355
13356 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13357 .addReg(HiReg)
13358 .addReg(ReadAgainReg);
13359 BuildMI(BB, dl, TII->get(PPC::BCC))
13361 .addReg(CmpReg)
13362 .addMBB(readMBB);
13363
13364 BB->addSuccessor(readMBB);
13365 BB->addSuccessor(sinkMBB);
13366 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13367 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13368 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13369 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13370 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13371 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13372 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13373 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13374
13375 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13376 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13377 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13378 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13379 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13380 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13381 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13382 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13383
13384 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13385 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13386 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13387 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13388 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13389 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13390 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13391 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13392
13393 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13394 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13395 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13396 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13397 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13398 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13399 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13400 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13401
13402 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13403 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13404 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13405 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13406 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13407 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13408 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13409 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13410
13411 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13412 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13413 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13414 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13415 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13416 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13417 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13418 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13419
13420 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13421 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13422 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13423 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13424 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13425 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13426 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13427 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13428
13429 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13430 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13431 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13432 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13433 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13434 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13435 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13436 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13437
13438 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13439 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13440 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13441 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13442 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13443 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13444 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13445 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13446
13447 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13448 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13449 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13450 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13451 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13452 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13453 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13454 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13455
13456 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13457 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13458 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13459 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13460 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13461 BB = EmitAtomicBinary(MI, BB, 4, 0);
13462 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13463 BB = EmitAtomicBinary(MI, BB, 8, 0);
13464 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13465 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13466 (Subtarget.hasPartwordAtomics() &&
13467 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13468 (Subtarget.hasPartwordAtomics() &&
13469 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13470 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13471
13472 auto LoadMnemonic = PPC::LDARX;
13473 auto StoreMnemonic = PPC::STDCX;
13474 switch (MI.getOpcode()) {
13475 default:
13476 llvm_unreachable("Compare and swap of unknown size");
13477 case PPC::ATOMIC_CMP_SWAP_I8:
13478 LoadMnemonic = PPC::LBARX;
13479 StoreMnemonic = PPC::STBCX;
13480 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13481 break;
13482 case PPC::ATOMIC_CMP_SWAP_I16:
13483 LoadMnemonic = PPC::LHARX;
13484 StoreMnemonic = PPC::STHCX;
13485 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13486 break;
13487 case PPC::ATOMIC_CMP_SWAP_I32:
13488 LoadMnemonic = PPC::LWARX;
13489 StoreMnemonic = PPC::STWCX;
13490 break;
13491 case PPC::ATOMIC_CMP_SWAP_I64:
13492 LoadMnemonic = PPC::LDARX;
13493 StoreMnemonic = PPC::STDCX;
13494 break;
13495 }
13496 MachineRegisterInfo &RegInfo = F->getRegInfo();
13497 Register dest = MI.getOperand(0).getReg();
13498 Register ptrA = MI.getOperand(1).getReg();
13499 Register ptrB = MI.getOperand(2).getReg();
13500 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13501 Register oldval = MI.getOperand(3).getReg();
13502 Register newval = MI.getOperand(4).getReg();
13503 DebugLoc dl = MI.getDebugLoc();
13504
13505 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13506 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13507 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13508 F->insert(It, loop1MBB);
13509 F->insert(It, loop2MBB);
13510 F->insert(It, exitMBB);
13511 exitMBB->splice(exitMBB->begin(), BB,
13512 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13514
13515 // thisMBB:
13516 // ...
13517 // fallthrough --> loopMBB
13518 BB->addSuccessor(loop1MBB);
13519
13520 // loop1MBB:
13521 // l[bhwd]arx dest, ptr
13522 // cmp[wd] dest, oldval
13523 // bne- exitBB
13524 // loop2MBB:
13525 // st[bhwd]cx. newval, ptr
13526 // bne- loopMBB
13527 // b exitBB
13528 // exitBB:
13529 BB = loop1MBB;
13530 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13531 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13532 .addReg(dest)
13533 .addReg(oldval);
13534 BuildMI(BB, dl, TII->get(PPC::BCC))
13536 .addReg(CrReg)
13537 .addMBB(exitMBB);
13538 BB->addSuccessor(loop2MBB);
13539 BB->addSuccessor(exitMBB);
13540
13541 BB = loop2MBB;
13542 BuildMI(BB, dl, TII->get(StoreMnemonic))
13543 .addReg(newval)
13544 .addReg(ptrA)
13545 .addReg(ptrB);
13546 BuildMI(BB, dl, TII->get(PPC::BCC))
13548 .addReg(PPC::CR0)
13549 .addMBB(loop1MBB);
13550 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13551 BB->addSuccessor(loop1MBB);
13552 BB->addSuccessor(exitMBB);
13553
13554 // exitMBB:
13555 // ...
13556 BB = exitMBB;
13557 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13558 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13559 // We must use 64-bit registers for addresses when targeting 64-bit,
13560 // since we're actually doing arithmetic on them. Other registers
13561 // can be 32-bit.
13562 bool is64bit = Subtarget.isPPC64();
13563 bool isLittleEndian = Subtarget.isLittleEndian();
13564 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13565
13566 Register dest = MI.getOperand(0).getReg();
13567 Register ptrA = MI.getOperand(1).getReg();
13568 Register ptrB = MI.getOperand(2).getReg();
13569 Register oldval = MI.getOperand(3).getReg();
13570 Register newval = MI.getOperand(4).getReg();
13571 DebugLoc dl = MI.getDebugLoc();
13572
13573 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13574 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13575 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13576 F->insert(It, loop1MBB);
13577 F->insert(It, loop2MBB);
13578 F->insert(It, exitMBB);
13579 exitMBB->splice(exitMBB->begin(), BB,
13580 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13582
13583 MachineRegisterInfo &RegInfo = F->getRegInfo();
13584 const TargetRegisterClass *RC =
13585 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13586 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13587
13588 Register PtrReg = RegInfo.createVirtualRegister(RC);
13589 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13590 Register ShiftReg =
13591 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13592 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13593 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13594 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13595 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13596 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13597 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13598 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13599 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13600 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13601 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13602 Register Ptr1Reg;
13603 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13604 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13605 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13606 // thisMBB:
13607 // ...
13608 // fallthrough --> loopMBB
13609 BB->addSuccessor(loop1MBB);
13610
13611 // The 4-byte load must be aligned, while a char or short may be
13612 // anywhere in the word. Hence all this nasty bookkeeping code.
13613 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13614 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13615 // xori shift, shift1, 24 [16]
13616 // rlwinm ptr, ptr1, 0, 0, 29
13617 // slw newval2, newval, shift
13618 // slw oldval2, oldval,shift
13619 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13620 // slw mask, mask2, shift
13621 // and newval3, newval2, mask
13622 // and oldval3, oldval2, mask
13623 // loop1MBB:
13624 // lwarx tmpDest, ptr
13625 // and tmp, tmpDest, mask
13626 // cmpw tmp, oldval3
13627 // bne- exitBB
13628 // loop2MBB:
13629 // andc tmp2, tmpDest, mask
13630 // or tmp4, tmp2, newval3
13631 // stwcx. tmp4, ptr
13632 // bne- loop1MBB
13633 // b exitBB
13634 // exitBB:
13635 // srw dest, tmpDest, shift
13636 if (ptrA != ZeroReg) {
13637 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13638 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13639 .addReg(ptrA)
13640 .addReg(ptrB);
13641 } else {
13642 Ptr1Reg = ptrB;
13643 }
13644
13645 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13646 // mode.
13647 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13648 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13649 .addImm(3)
13650 .addImm(27)
13651 .addImm(is8bit ? 28 : 27);
13652 if (!isLittleEndian)
13653 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13654 .addReg(Shift1Reg)
13655 .addImm(is8bit ? 24 : 16);
13656 if (is64bit)
13657 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13658 .addReg(Ptr1Reg)
13659 .addImm(0)
13660 .addImm(61);
13661 else
13662 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13663 .addReg(Ptr1Reg)
13664 .addImm(0)
13665 .addImm(0)
13666 .addImm(29);
13667 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13668 .addReg(newval)
13669 .addReg(ShiftReg);
13670 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13671 .addReg(oldval)
13672 .addReg(ShiftReg);
13673 if (is8bit)
13674 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13675 else {
13676 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13677 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13678 .addReg(Mask3Reg)
13679 .addImm(65535);
13680 }
13681 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13682 .addReg(Mask2Reg)
13683 .addReg(ShiftReg);
13684 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13685 .addReg(NewVal2Reg)
13686 .addReg(MaskReg);
13687 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13688 .addReg(OldVal2Reg)
13689 .addReg(MaskReg);
13690
13691 BB = loop1MBB;
13692 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13693 .addReg(ZeroReg)
13694 .addReg(PtrReg);
13695 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13696 .addReg(TmpDestReg)
13697 .addReg(MaskReg);
13698 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13699 .addReg(TmpReg)
13700 .addReg(OldVal3Reg);
13701 BuildMI(BB, dl, TII->get(PPC::BCC))
13703 .addReg(CrReg)
13704 .addMBB(exitMBB);
13705 BB->addSuccessor(loop2MBB);
13706 BB->addSuccessor(exitMBB);
13707
13708 BB = loop2MBB;
13709 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13710 .addReg(TmpDestReg)
13711 .addReg(MaskReg);
13712 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13713 .addReg(Tmp2Reg)
13714 .addReg(NewVal3Reg);
13715 BuildMI(BB, dl, TII->get(PPC::STWCX))
13716 .addReg(Tmp4Reg)
13717 .addReg(ZeroReg)
13718 .addReg(PtrReg);
13719 BuildMI(BB, dl, TII->get(PPC::BCC))
13721 .addReg(PPC::CR0)
13722 .addMBB(loop1MBB);
13723 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13724 BB->addSuccessor(loop1MBB);
13725 BB->addSuccessor(exitMBB);
13726
13727 // exitMBB:
13728 // ...
13729 BB = exitMBB;
13730 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13731 .addReg(TmpReg)
13732 .addReg(ShiftReg);
13733 } else if (MI.getOpcode() == PPC::FADDrtz) {
13734 // This pseudo performs an FADD with rounding mode temporarily forced
13735 // to round-to-zero. We emit this via custom inserter since the FPSCR
13736 // is not modeled at the SelectionDAG level.
13737 Register Dest = MI.getOperand(0).getReg();
13738 Register Src1 = MI.getOperand(1).getReg();
13739 Register Src2 = MI.getOperand(2).getReg();
13740 DebugLoc dl = MI.getDebugLoc();
13741
13742 MachineRegisterInfo &RegInfo = F->getRegInfo();
13743 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13744
13745 // Save FPSCR value.
13746 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13747
13748 // Set rounding mode to round-to-zero.
13749 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13750 .addImm(31)
13752
13753 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13754 .addImm(30)
13756
13757 // Perform addition.
13758 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13759 .addReg(Src1)
13760 .addReg(Src2);
13761 if (MI.getFlag(MachineInstr::NoFPExcept))
13763
13764 // Restore FPSCR value.
13765 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13766 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13767 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13768 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13769 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13770 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13771 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13772 ? PPC::ANDI8_rec
13773 : PPC::ANDI_rec;
13774 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13775 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13776
13777 MachineRegisterInfo &RegInfo = F->getRegInfo();
13778 Register Dest = RegInfo.createVirtualRegister(
13779 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13780
13781 DebugLoc Dl = MI.getDebugLoc();
13782 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13783 .addReg(MI.getOperand(1).getReg())
13784 .addImm(1);
13785 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13786 MI.getOperand(0).getReg())
13787 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13788 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13789 DebugLoc Dl = MI.getDebugLoc();
13790 MachineRegisterInfo &RegInfo = F->getRegInfo();
13791 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13792 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13793 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13794 MI.getOperand(0).getReg())
13795 .addReg(CRReg);
13796 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13797 DebugLoc Dl = MI.getDebugLoc();
13798 unsigned Imm = MI.getOperand(1).getImm();
13799 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13800 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13801 MI.getOperand(0).getReg())
13802 .addReg(PPC::CR0EQ);
13803 } else if (MI.getOpcode() == PPC::SETRNDi) {
13804 DebugLoc dl = MI.getDebugLoc();
13805 Register OldFPSCRReg = MI.getOperand(0).getReg();
13806
13807 // Save FPSCR value.
13808 if (MRI.use_empty(OldFPSCRReg))
13809 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13810 else
13811 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13812
13813 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13814 // the following settings:
13815 // 00 Round to nearest
13816 // 01 Round to 0
13817 // 10 Round to +inf
13818 // 11 Round to -inf
13819
13820 // When the operand is immediate, using the two least significant bits of
13821 // the immediate to set the bits 62:63 of FPSCR.
13822 unsigned Mode = MI.getOperand(1).getImm();
13823 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13824 .addImm(31)
13826
13827 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13828 .addImm(30)
13830 } else if (MI.getOpcode() == PPC::SETRND) {
13831 DebugLoc dl = MI.getDebugLoc();
13832
13833 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13834 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13835 // If the target doesn't have DirectMove, we should use stack to do the
13836 // conversion, because the target doesn't have the instructions like mtvsrd
13837 // or mfvsrd to do this conversion directly.
13838 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13839 if (Subtarget.hasDirectMove()) {
13840 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13841 .addReg(SrcReg);
13842 } else {
13843 // Use stack to do the register copy.
13844 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13845 MachineRegisterInfo &RegInfo = F->getRegInfo();
13846 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13847 if (RC == &PPC::F8RCRegClass) {
13848 // Copy register from F8RCRegClass to G8RCRegclass.
13849 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13850 "Unsupported RegClass.");
13851
13852 StoreOp = PPC::STFD;
13853 LoadOp = PPC::LD;
13854 } else {
13855 // Copy register from G8RCRegClass to F8RCRegclass.
13856 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13857 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13858 "Unsupported RegClass.");
13859 }
13860
13861 MachineFrameInfo &MFI = F->getFrameInfo();
13862 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13863
13864 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13865 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13867 MFI.getObjectAlign(FrameIdx));
13868
13869 // Store the SrcReg into the stack.
13870 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13871 .addReg(SrcReg)
13872 .addImm(0)
13873 .addFrameIndex(FrameIdx)
13874 .addMemOperand(MMOStore);
13875
13876 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13877 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13879 MFI.getObjectAlign(FrameIdx));
13880
13881 // Load from the stack where SrcReg is stored, and save to DestReg,
13882 // so we have done the RegClass conversion from RegClass::SrcReg to
13883 // RegClass::DestReg.
13884 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13885 .addImm(0)
13886 .addFrameIndex(FrameIdx)
13887 .addMemOperand(MMOLoad);
13888 }
13889 };
13890
13891 Register OldFPSCRReg = MI.getOperand(0).getReg();
13892
13893 // Save FPSCR value.
13894 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13895
13896 // When the operand is gprc register, use two least significant bits of the
13897 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13898 //
13899 // copy OldFPSCRTmpReg, OldFPSCRReg
13900 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13901 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13902 // copy NewFPSCRReg, NewFPSCRTmpReg
13903 // mtfsf 255, NewFPSCRReg
13904 MachineOperand SrcOp = MI.getOperand(1);
13905 MachineRegisterInfo &RegInfo = F->getRegInfo();
13906 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13907
13908 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13909
13910 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13911 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13912
13913 // The first operand of INSERT_SUBREG should be a register which has
13914 // subregisters, we only care about its RegClass, so we should use an
13915 // IMPLICIT_DEF register.
13916 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13917 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13918 .addReg(ImDefReg)
13919 .add(SrcOp)
13920 .addImm(1);
13921
13922 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13923 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13924 .addReg(OldFPSCRTmpReg)
13925 .addReg(ExtSrcReg)
13926 .addImm(0)
13927 .addImm(62);
13928
13929 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13930 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13931
13932 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13933 // bits of FPSCR.
13934 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13935 .addImm(255)
13936 .addReg(NewFPSCRReg)
13937 .addImm(0)
13938 .addImm(0);
13939 } else if (MI.getOpcode() == PPC::SETFLM) {
13940 DebugLoc Dl = MI.getDebugLoc();
13941
13942 // Result of setflm is previous FPSCR content, so we need to save it first.
13943 Register OldFPSCRReg = MI.getOperand(0).getReg();
13944 if (MRI.use_empty(OldFPSCRReg))
13945 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13946 else
13947 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13948
13949 // Put bits in 32:63 to FPSCR.
13950 Register NewFPSCRReg = MI.getOperand(1).getReg();
13951 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13952 .addImm(255)
13953 .addReg(NewFPSCRReg)
13954 .addImm(0)
13955 .addImm(0);
13956 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13957 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13958 return emitProbedAlloca(MI, BB);
13959 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13960 DebugLoc DL = MI.getDebugLoc();
13961 Register Src = MI.getOperand(2).getReg();
13962 Register Lo = MI.getOperand(0).getReg();
13963 Register Hi = MI.getOperand(1).getReg();
13964 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13965 .addDef(Lo)
13966 .addUse(Src, 0, PPC::sub_gp8_x1);
13967 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13968 .addDef(Hi)
13969 .addUse(Src, 0, PPC::sub_gp8_x0);
13970 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13971 MI.getOpcode() == PPC::STQX_PSEUDO) {
13972 DebugLoc DL = MI.getDebugLoc();
13973 // Ptr is used as the ptr_rc_no_r0 part
13974 // of LQ/STQ's memory operand and adding result of RA and RB,
13975 // so it has to be g8rc_and_g8rc_nox0.
13976 Register Ptr =
13977 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13978 Register Val = MI.getOperand(0).getReg();
13979 Register RA = MI.getOperand(1).getReg();
13980 Register RB = MI.getOperand(2).getReg();
13981 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13982 BuildMI(*BB, MI, DL,
13983 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13984 : TII->get(PPC::STQ))
13985 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13986 .addImm(0)
13987 .addReg(Ptr);
13988 } else {
13989 llvm_unreachable("Unexpected instr type to insert");
13990 }
13991
13992 MI.eraseFromParent(); // The pseudo instruction is gone now.
13993 return BB;
13994}
13995
13996//===----------------------------------------------------------------------===//
13997// Target Optimization Hooks
13998//===----------------------------------------------------------------------===//
13999
14000static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14001 // For the estimates, convergence is quadratic, so we essentially double the
14002 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14003 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14004 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14005 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14006 if (VT.getScalarType() == MVT::f64)
14007 RefinementSteps++;
14008 return RefinementSteps;
14009}
14010
14011SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14012 const DenormalMode &Mode) const {
14013 // We only have VSX Vector Test for software Square Root.
14014 EVT VT = Op.getValueType();
14015 if (!isTypeLegal(MVT::i1) ||
14016 (VT != MVT::f64 &&
14017 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14018 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
14019
14020 SDLoc DL(Op);
14021 // The output register of FTSQRT is CR field.
14022 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14023 // ftsqrt BF,FRB
14024 // Let e_b be the unbiased exponent of the double-precision
14025 // floating-point operand in register FRB.
14026 // fe_flag is set to 1 if either of the following conditions occurs.
14027 // - The double-precision floating-point operand in register FRB is a zero,
14028 // a NaN, or an infinity, or a negative value.
14029 // - e_b is less than or equal to -970.
14030 // Otherwise fe_flag is set to 0.
14031 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14032 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14033 // exponent is less than -970)
14034 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14035 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14036 FTSQRT, SRIdxVal),
14037 0);
14038}
14039
14040SDValue
14041PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14042 SelectionDAG &DAG) const {
14043 // We only have VSX Vector Square Root.
14044 EVT VT = Op.getValueType();
14045 if (VT != MVT::f64 &&
14046 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14048
14049 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14050}
14051
14052SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14053 int Enabled, int &RefinementSteps,
14054 bool &UseOneConstNR,
14055 bool Reciprocal) const {
14056 EVT VT = Operand.getValueType();
14057 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14058 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14059 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14060 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14061 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14062 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14063
14064 // The Newton-Raphson computation with a single constant does not provide
14065 // enough accuracy on some CPUs.
14066 UseOneConstNR = !Subtarget.needsTwoConstNR();
14067 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14068 }
14069 return SDValue();
14070}
14071
14072SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14073 int Enabled,
14074 int &RefinementSteps) const {
14075 EVT VT = Operand.getValueType();
14076 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14077 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14078 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14079 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14080 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14081 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14082 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14083 }
14084 return SDValue();
14085}
14086
14087unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
14088 // Note: This functionality is used only when unsafe-fp-math is enabled, and
14089 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
14090 // enabled for division), this functionality is redundant with the default
14091 // combiner logic (once the division -> reciprocal/multiply transformation
14092 // has taken place). As a result, this matters more for older cores than for
14093 // newer ones.
14094
14095 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14096 // reciprocal if there are two or more FDIVs (for embedded cores with only
14097 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14098 switch (Subtarget.getCPUDirective()) {
14099 default:
14100 return 3;
14101 case PPC::DIR_440:
14102 case PPC::DIR_A2:
14103 case PPC::DIR_E500:
14104 case PPC::DIR_E500mc:
14105 case PPC::DIR_E5500:
14106 return 2;
14107 }
14108}
14109
14110// isConsecutiveLSLoc needs to work even if all adds have not yet been
14111// collapsed, and so we need to look through chains of them.
14113 int64_t& Offset, SelectionDAG &DAG) {
14114 if (DAG.isBaseWithConstantOffset(Loc)) {
14115 Base = Loc.getOperand(0);
14116 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14117
14118 // The base might itself be a base plus an offset, and if so, accumulate
14119 // that as well.
14121 }
14122}
14123
14125 unsigned Bytes, int Dist,
14126 SelectionDAG &DAG) {
14127 if (VT.getSizeInBits() / 8 != Bytes)
14128 return false;
14129
14130 SDValue BaseLoc = Base->getBasePtr();
14131 if (Loc.getOpcode() == ISD::FrameIndex) {
14132 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14133 return false;
14135 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14136 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14137 int FS = MFI.getObjectSize(FI);
14138 int BFS = MFI.getObjectSize(BFI);
14139 if (FS != BFS || FS != (int)Bytes) return false;
14140 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14141 }
14142
14143 SDValue Base1 = Loc, Base2 = BaseLoc;
14144 int64_t Offset1 = 0, Offset2 = 0;
14145 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14146 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14147 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14148 return true;
14149
14150 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14151 const GlobalValue *GV1 = nullptr;
14152 const GlobalValue *GV2 = nullptr;
14153 Offset1 = 0;
14154 Offset2 = 0;
14155 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14156 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14157 if (isGA1 && isGA2 && GV1 == GV2)
14158 return Offset1 == (Offset2 + Dist*Bytes);
14159 return false;
14160}
14161
14162// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14163// not enforce equality of the chain operands.
14165 unsigned Bytes, int Dist,
14166 SelectionDAG &DAG) {
14167 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
14168 EVT VT = LS->getMemoryVT();
14169 SDValue Loc = LS->getBasePtr();
14170 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14171 }
14172
14173 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14174 EVT VT;
14175 switch (N->getConstantOperandVal(1)) {
14176 default: return false;
14177 case Intrinsic::ppc_altivec_lvx:
14178 case Intrinsic::ppc_altivec_lvxl:
14179 case Intrinsic::ppc_vsx_lxvw4x:
14180 case Intrinsic::ppc_vsx_lxvw4x_be:
14181 VT = MVT::v4i32;
14182 break;
14183 case Intrinsic::ppc_vsx_lxvd2x:
14184 case Intrinsic::ppc_vsx_lxvd2x_be:
14185 VT = MVT::v2f64;
14186 break;
14187 case Intrinsic::ppc_altivec_lvebx:
14188 VT = MVT::i8;
14189 break;
14190 case Intrinsic::ppc_altivec_lvehx:
14191 VT = MVT::i16;
14192 break;
14193 case Intrinsic::ppc_altivec_lvewx:
14194 VT = MVT::i32;
14195 break;
14196 }
14197
14198 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14199 }
14200
14201 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14202 EVT VT;
14203 switch (N->getConstantOperandVal(1)) {
14204 default: return false;
14205 case Intrinsic::ppc_altivec_stvx:
14206 case Intrinsic::ppc_altivec_stvxl:
14207 case Intrinsic::ppc_vsx_stxvw4x:
14208 VT = MVT::v4i32;
14209 break;
14210 case Intrinsic::ppc_vsx_stxvd2x:
14211 VT = MVT::v2f64;
14212 break;
14213 case Intrinsic::ppc_vsx_stxvw4x_be:
14214 VT = MVT::v4i32;
14215 break;
14216 case Intrinsic::ppc_vsx_stxvd2x_be:
14217 VT = MVT::v2f64;
14218 break;
14219 case Intrinsic::ppc_altivec_stvebx:
14220 VT = MVT::i8;
14221 break;
14222 case Intrinsic::ppc_altivec_stvehx:
14223 VT = MVT::i16;
14224 break;
14225 case Intrinsic::ppc_altivec_stvewx:
14226 VT = MVT::i32;
14227 break;
14228 }
14229
14230 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14231 }
14232
14233 return false;
14234}
14235
14236// Return true is there is a nearyby consecutive load to the one provided
14237// (regardless of alignment). We search up and down the chain, looking though
14238// token factors and other loads (but nothing else). As a result, a true result
14239// indicates that it is safe to create a new consecutive load adjacent to the
14240// load provided.
14242 SDValue Chain = LD->getChain();
14243 EVT VT = LD->getMemoryVT();
14244
14245 SmallSet<SDNode *, 16> LoadRoots;
14246 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14247 SmallSet<SDNode *, 16> Visited;
14248
14249 // First, search up the chain, branching to follow all token-factor operands.
14250 // If we find a consecutive load, then we're done, otherwise, record all
14251 // nodes just above the top-level loads and token factors.
14252 while (!Queue.empty()) {
14253 SDNode *ChainNext = Queue.pop_back_val();
14254 if (!Visited.insert(ChainNext).second)
14255 continue;
14256
14257 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14258 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14259 return true;
14260
14261 if (!Visited.count(ChainLD->getChain().getNode()))
14262 Queue.push_back(ChainLD->getChain().getNode());
14263 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14264 for (const SDUse &O : ChainNext->ops())
14265 if (!Visited.count(O.getNode()))
14266 Queue.push_back(O.getNode());
14267 } else
14268 LoadRoots.insert(ChainNext);
14269 }
14270
14271 // Second, search down the chain, starting from the top-level nodes recorded
14272 // in the first phase. These top-level nodes are the nodes just above all
14273 // loads and token factors. Starting with their uses, recursively look though
14274 // all loads (just the chain uses) and token factors to find a consecutive
14275 // load.
14276 Visited.clear();
14277 Queue.clear();
14278
14279 for (SDNode *I : LoadRoots) {
14280 Queue.push_back(I);
14281
14282 while (!Queue.empty()) {
14283 SDNode *LoadRoot = Queue.pop_back_val();
14284 if (!Visited.insert(LoadRoot).second)
14285 continue;
14286
14287 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14288 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14289 return true;
14290
14291 for (SDNode *U : LoadRoot->users())
14292 if (((isa<MemSDNode>(U) &&
14293 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14294 U->getOpcode() == ISD::TokenFactor) &&
14295 !Visited.count(U))
14296 Queue.push_back(U);
14297 }
14298 }
14299
14300 return false;
14301}
14302
14303/// This function is called when we have proved that a SETCC node can be replaced
14304/// by subtraction (and other supporting instructions) so that the result of
14305/// comparison is kept in a GPR instead of CR. This function is purely for
14306/// codegen purposes and has some flags to guide the codegen process.
14307static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14308 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14309 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14310
14311 // Zero extend the operands to the largest legal integer. Originally, they
14312 // must be of a strictly smaller size.
14313 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14314 DAG.getConstant(Size, DL, MVT::i32));
14315 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14316 DAG.getConstant(Size, DL, MVT::i32));
14317
14318 // Swap if needed. Depends on the condition code.
14319 if (Swap)
14320 std::swap(Op0, Op1);
14321
14322 // Subtract extended integers.
14323 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14324
14325 // Move the sign bit to the least significant position and zero out the rest.
14326 // Now the least significant bit carries the result of original comparison.
14327 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14328 DAG.getConstant(Size - 1, DL, MVT::i32));
14329 auto Final = Shifted;
14330
14331 // Complement the result if needed. Based on the condition code.
14332 if (Complement)
14333 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14334 DAG.getConstant(1, DL, MVT::i64));
14335
14336 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14337}
14338
14339SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14340 DAGCombinerInfo &DCI) const {
14341 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14342
14343 SelectionDAG &DAG = DCI.DAG;
14344 SDLoc DL(N);
14345
14346 // Size of integers being compared has a critical role in the following
14347 // analysis, so we prefer to do this when all types are legal.
14348 if (!DCI.isAfterLegalizeDAG())
14349 return SDValue();
14350
14351 // If all users of SETCC extend its value to a legal integer type
14352 // then we replace SETCC with a subtraction
14353 for (const SDNode *U : N->users())
14354 if (U->getOpcode() != ISD::ZERO_EXTEND)
14355 return SDValue();
14356
14357 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14358 auto OpSize = N->getOperand(0).getValueSizeInBits();
14359
14361
14362 if (OpSize < Size) {
14363 switch (CC) {
14364 default: break;
14365 case ISD::SETULT:
14366 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14367 case ISD::SETULE:
14368 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14369 case ISD::SETUGT:
14370 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14371 case ISD::SETUGE:
14372 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14373 }
14374 }
14375
14376 return SDValue();
14377}
14378
14379SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14380 DAGCombinerInfo &DCI) const {
14381 SelectionDAG &DAG = DCI.DAG;
14382 SDLoc dl(N);
14383
14384 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14385 // If we're tracking CR bits, we need to be careful that we don't have:
14386 // trunc(binary-ops(zext(x), zext(y)))
14387 // or
14388 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14389 // such that we're unnecessarily moving things into GPRs when it would be
14390 // better to keep them in CR bits.
14391
14392 // Note that trunc here can be an actual i1 trunc, or can be the effective
14393 // truncation that comes from a setcc or select_cc.
14394 if (N->getOpcode() == ISD::TRUNCATE &&
14395 N->getValueType(0) != MVT::i1)
14396 return SDValue();
14397
14398 if (N->getOperand(0).getValueType() != MVT::i32 &&
14399 N->getOperand(0).getValueType() != MVT::i64)
14400 return SDValue();
14401
14402 if (N->getOpcode() == ISD::SETCC ||
14403 N->getOpcode() == ISD::SELECT_CC) {
14404 // If we're looking at a comparison, then we need to make sure that the
14405 // high bits (all except for the first) don't matter the result.
14407 cast<CondCodeSDNode>(N->getOperand(
14408 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14409 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14410
14412 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14413 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14414 return SDValue();
14415 } else if (ISD::isUnsignedIntSetCC(CC)) {
14416 if (!DAG.MaskedValueIsZero(N->getOperand(0),
14417 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14418 !DAG.MaskedValueIsZero(N->getOperand(1),
14419 APInt::getHighBitsSet(OpBits, OpBits-1)))
14420 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14421 : SDValue());
14422 } else {
14423 // This is neither a signed nor an unsigned comparison, just make sure
14424 // that the high bits are equal.
14425 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14426 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14427
14428 // We don't really care about what is known about the first bit (if
14429 // anything), so pretend that it is known zero for both to ensure they can
14430 // be compared as constants.
14431 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14432 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14433
14434 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14435 Op1Known.getConstant() != Op2Known.getConstant())
14436 return SDValue();
14437 }
14438 }
14439
14440 // We now know that the higher-order bits are irrelevant, we just need to
14441 // make sure that all of the intermediate operations are bit operations, and
14442 // all inputs are extensions.
14443 if (N->getOperand(0).getOpcode() != ISD::AND &&
14444 N->getOperand(0).getOpcode() != ISD::OR &&
14445 N->getOperand(0).getOpcode() != ISD::XOR &&
14446 N->getOperand(0).getOpcode() != ISD::SELECT &&
14447 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14448 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14449 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14450 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14451 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14452 return SDValue();
14453
14454 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14455 N->getOperand(1).getOpcode() != ISD::AND &&
14456 N->getOperand(1).getOpcode() != ISD::OR &&
14457 N->getOperand(1).getOpcode() != ISD::XOR &&
14458 N->getOperand(1).getOpcode() != ISD::SELECT &&
14459 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14460 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14461 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14462 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14463 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14464 return SDValue();
14465
14467 SmallVector<SDValue, 8> BinOps, PromOps;
14469
14470 for (unsigned i = 0; i < 2; ++i) {
14471 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14472 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14473 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14474 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14475 isa<ConstantSDNode>(N->getOperand(i)))
14476 Inputs.push_back(N->getOperand(i));
14477 else
14478 BinOps.push_back(N->getOperand(i));
14479
14480 if (N->getOpcode() == ISD::TRUNCATE)
14481 break;
14482 }
14483
14484 // Visit all inputs, collect all binary operations (and, or, xor and
14485 // select) that are all fed by extensions.
14486 while (!BinOps.empty()) {
14487 SDValue BinOp = BinOps.pop_back_val();
14488
14489 if (!Visited.insert(BinOp.getNode()).second)
14490 continue;
14491
14492 PromOps.push_back(BinOp);
14493
14494 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14495 // The condition of the select is not promoted.
14496 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14497 continue;
14498 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14499 continue;
14500
14501 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14502 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14503 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14504 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14505 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14506 Inputs.push_back(BinOp.getOperand(i));
14507 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14508 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14509 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14510 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14511 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14512 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14513 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14514 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14515 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14516 BinOps.push_back(BinOp.getOperand(i));
14517 } else {
14518 // We have an input that is not an extension or another binary
14519 // operation; we'll abort this transformation.
14520 return SDValue();
14521 }
14522 }
14523 }
14524
14525 // Make sure that this is a self-contained cluster of operations (which
14526 // is not quite the same thing as saying that everything has only one
14527 // use).
14528 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14529 if (isa<ConstantSDNode>(Inputs[i]))
14530 continue;
14531
14532 for (const SDNode *User : Inputs[i].getNode()->users()) {
14533 if (User != N && !Visited.count(User))
14534 return SDValue();
14535
14536 // Make sure that we're not going to promote the non-output-value
14537 // operand(s) or SELECT or SELECT_CC.
14538 // FIXME: Although we could sometimes handle this, and it does occur in
14539 // practice that one of the condition inputs to the select is also one of
14540 // the outputs, we currently can't deal with this.
14541 if (User->getOpcode() == ISD::SELECT) {
14542 if (User->getOperand(0) == Inputs[i])
14543 return SDValue();
14544 } else if (User->getOpcode() == ISD::SELECT_CC) {
14545 if (User->getOperand(0) == Inputs[i] ||
14546 User->getOperand(1) == Inputs[i])
14547 return SDValue();
14548 }
14549 }
14550 }
14551
14552 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14553 for (const SDNode *User : PromOps[i].getNode()->users()) {
14554 if (User != N && !Visited.count(User))
14555 return SDValue();
14556
14557 // Make sure that we're not going to promote the non-output-value
14558 // operand(s) or SELECT or SELECT_CC.
14559 // FIXME: Although we could sometimes handle this, and it does occur in
14560 // practice that one of the condition inputs to the select is also one of
14561 // the outputs, we currently can't deal with this.
14562 if (User->getOpcode() == ISD::SELECT) {
14563 if (User->getOperand(0) == PromOps[i])
14564 return SDValue();
14565 } else if (User->getOpcode() == ISD::SELECT_CC) {
14566 if (User->getOperand(0) == PromOps[i] ||
14567 User->getOperand(1) == PromOps[i])
14568 return SDValue();
14569 }
14570 }
14571 }
14572
14573 // Replace all inputs with the extension operand.
14574 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14575 // Constants may have users outside the cluster of to-be-promoted nodes,
14576 // and so we need to replace those as we do the promotions.
14577 if (isa<ConstantSDNode>(Inputs[i]))
14578 continue;
14579 else
14580 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14581 }
14582
14583 std::list<HandleSDNode> PromOpHandles;
14584 for (auto &PromOp : PromOps)
14585 PromOpHandles.emplace_back(PromOp);
14586
14587 // Replace all operations (these are all the same, but have a different
14588 // (i1) return type). DAG.getNode will validate that the types of
14589 // a binary operator match, so go through the list in reverse so that
14590 // we've likely promoted both operands first. Any intermediate truncations or
14591 // extensions disappear.
14592 while (!PromOpHandles.empty()) {
14593 SDValue PromOp = PromOpHandles.back().getValue();
14594 PromOpHandles.pop_back();
14595
14596 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14597 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14598 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14599 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14600 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14601 PromOp.getOperand(0).getValueType() != MVT::i1) {
14602 // The operand is not yet ready (see comment below).
14603 PromOpHandles.emplace_front(PromOp);
14604 continue;
14605 }
14606
14607 SDValue RepValue = PromOp.getOperand(0);
14608 if (isa<ConstantSDNode>(RepValue))
14609 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14610
14611 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14612 continue;
14613 }
14614
14615 unsigned C;
14616 switch (PromOp.getOpcode()) {
14617 default: C = 0; break;
14618 case ISD::SELECT: C = 1; break;
14619 case ISD::SELECT_CC: C = 2; break;
14620 }
14621
14622 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14623 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14624 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14625 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14626 // The to-be-promoted operands of this node have not yet been
14627 // promoted (this should be rare because we're going through the
14628 // list backward, but if one of the operands has several users in
14629 // this cluster of to-be-promoted nodes, it is possible).
14630 PromOpHandles.emplace_front(PromOp);
14631 continue;
14632 }
14633
14634 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
14635
14636 // If there are any constant inputs, make sure they're replaced now.
14637 for (unsigned i = 0; i < 2; ++i)
14638 if (isa<ConstantSDNode>(Ops[C+i]))
14639 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14640
14641 DAG.ReplaceAllUsesOfValueWith(PromOp,
14642 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14643 }
14644
14645 // Now we're left with the initial truncation itself.
14646 if (N->getOpcode() == ISD::TRUNCATE)
14647 return N->getOperand(0);
14648
14649 // Otherwise, this is a comparison. The operands to be compared have just
14650 // changed type (to i1), but everything else is the same.
14651 return SDValue(N, 0);
14652}
14653
14654SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14655 DAGCombinerInfo &DCI) const {
14656 SelectionDAG &DAG = DCI.DAG;
14657 SDLoc dl(N);
14658
14659 // If we're tracking CR bits, we need to be careful that we don't have:
14660 // zext(binary-ops(trunc(x), trunc(y)))
14661 // or
14662 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14663 // such that we're unnecessarily moving things into CR bits that can more
14664 // efficiently stay in GPRs. Note that if we're not certain that the high
14665 // bits are set as required by the final extension, we still may need to do
14666 // some masking to get the proper behavior.
14667
14668 // This same functionality is important on PPC64 when dealing with
14669 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14670 // the return values of functions. Because it is so similar, it is handled
14671 // here as well.
14672
14673 if (N->getValueType(0) != MVT::i32 &&
14674 N->getValueType(0) != MVT::i64)
14675 return SDValue();
14676
14677 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14678 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14679 return SDValue();
14680
14681 if (N->getOperand(0).getOpcode() != ISD::AND &&
14682 N->getOperand(0).getOpcode() != ISD::OR &&
14683 N->getOperand(0).getOpcode() != ISD::XOR &&
14684 N->getOperand(0).getOpcode() != ISD::SELECT &&
14685 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14686 return SDValue();
14687
14689 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14691
14692 // Visit all inputs, collect all binary operations (and, or, xor and
14693 // select) that are all fed by truncations.
14694 while (!BinOps.empty()) {
14695 SDValue BinOp = BinOps.pop_back_val();
14696
14697 if (!Visited.insert(BinOp.getNode()).second)
14698 continue;
14699
14700 PromOps.push_back(BinOp);
14701
14702 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14703 // The condition of the select is not promoted.
14704 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14705 continue;
14706 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14707 continue;
14708
14709 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14710 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14711 Inputs.push_back(BinOp.getOperand(i));
14712 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14713 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14714 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14715 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14716 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14717 BinOps.push_back(BinOp.getOperand(i));
14718 } else {
14719 // We have an input that is not a truncation or another binary
14720 // operation; we'll abort this transformation.
14721 return SDValue();
14722 }
14723 }
14724 }
14725
14726 // The operands of a select that must be truncated when the select is
14727 // promoted because the operand is actually part of the to-be-promoted set.
14728 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14729
14730 // Make sure that this is a self-contained cluster of operations (which
14731 // is not quite the same thing as saying that everything has only one
14732 // use).
14733 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14734 if (isa<ConstantSDNode>(Inputs[i]))
14735 continue;
14736
14737 for (SDNode *User : Inputs[i].getNode()->users()) {
14738 if (User != N && !Visited.count(User))
14739 return SDValue();
14740
14741 // If we're going to promote the non-output-value operand(s) or SELECT or
14742 // SELECT_CC, record them for truncation.
14743 if (User->getOpcode() == ISD::SELECT) {
14744 if (User->getOperand(0) == Inputs[i])
14745 SelectTruncOp[0].insert(std::make_pair(User,
14746 User->getOperand(0).getValueType()));
14747 } else if (User->getOpcode() == ISD::SELECT_CC) {
14748 if (User->getOperand(0) == Inputs[i])
14749 SelectTruncOp[0].insert(std::make_pair(User,
14750 User->getOperand(0).getValueType()));
14751 if (User->getOperand(1) == Inputs[i])
14752 SelectTruncOp[1].insert(std::make_pair(User,
14753 User->getOperand(1).getValueType()));
14754 }
14755 }
14756 }
14757
14758 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14759 for (SDNode *User : PromOps[i].getNode()->users()) {
14760 if (User != N && !Visited.count(User))
14761 return SDValue();
14762
14763 // If we're going to promote the non-output-value operand(s) or SELECT or
14764 // SELECT_CC, record them for truncation.
14765 if (User->getOpcode() == ISD::SELECT) {
14766 if (User->getOperand(0) == PromOps[i])
14767 SelectTruncOp[0].insert(std::make_pair(User,
14768 User->getOperand(0).getValueType()));
14769 } else if (User->getOpcode() == ISD::SELECT_CC) {
14770 if (User->getOperand(0) == PromOps[i])
14771 SelectTruncOp[0].insert(std::make_pair(User,
14772 User->getOperand(0).getValueType()));
14773 if (User->getOperand(1) == PromOps[i])
14774 SelectTruncOp[1].insert(std::make_pair(User,
14775 User->getOperand(1).getValueType()));
14776 }
14777 }
14778 }
14779
14780 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14781 bool ReallyNeedsExt = false;
14782 if (N->getOpcode() != ISD::ANY_EXTEND) {
14783 // If all of the inputs are not already sign/zero extended, then
14784 // we'll still need to do that at the end.
14785 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14786 if (isa<ConstantSDNode>(Inputs[i]))
14787 continue;
14788
14789 unsigned OpBits =
14790 Inputs[i].getOperand(0).getValueSizeInBits();
14791 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14792
14793 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14794 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14795 APInt::getHighBitsSet(OpBits,
14796 OpBits-PromBits))) ||
14797 (N->getOpcode() == ISD::SIGN_EXTEND &&
14798 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14799 (OpBits-(PromBits-1)))) {
14800 ReallyNeedsExt = true;
14801 break;
14802 }
14803 }
14804 }
14805
14806 // Replace all inputs, either with the truncation operand, or a
14807 // truncation or extension to the final output type.
14808 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14809 // Constant inputs need to be replaced with the to-be-promoted nodes that
14810 // use them because they might have users outside of the cluster of
14811 // promoted nodes.
14812 if (isa<ConstantSDNode>(Inputs[i]))
14813 continue;
14814
14815 SDValue InSrc = Inputs[i].getOperand(0);
14816 if (Inputs[i].getValueType() == N->getValueType(0))
14817 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14818 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14819 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14820 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14821 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14822 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14823 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14824 else
14825 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14826 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14827 }
14828
14829 std::list<HandleSDNode> PromOpHandles;
14830 for (auto &PromOp : PromOps)
14831 PromOpHandles.emplace_back(PromOp);
14832
14833 // Replace all operations (these are all the same, but have a different
14834 // (promoted) return type). DAG.getNode will validate that the types of
14835 // a binary operator match, so go through the list in reverse so that
14836 // we've likely promoted both operands first.
14837 while (!PromOpHandles.empty()) {
14838 SDValue PromOp = PromOpHandles.back().getValue();
14839 PromOpHandles.pop_back();
14840
14841 unsigned C;
14842 switch (PromOp.getOpcode()) {
14843 default: C = 0; break;
14844 case ISD::SELECT: C = 1; break;
14845 case ISD::SELECT_CC: C = 2; break;
14846 }
14847
14848 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14849 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14850 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14851 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14852 // The to-be-promoted operands of this node have not yet been
14853 // promoted (this should be rare because we're going through the
14854 // list backward, but if one of the operands has several users in
14855 // this cluster of to-be-promoted nodes, it is possible).
14856 PromOpHandles.emplace_front(PromOp);
14857 continue;
14858 }
14859
14860 // For SELECT and SELECT_CC nodes, we do a similar check for any
14861 // to-be-promoted comparison inputs.
14862 if (PromOp.getOpcode() == ISD::SELECT ||
14863 PromOp.getOpcode() == ISD::SELECT_CC) {
14864 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14865 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14866 (SelectTruncOp[1].count(PromOp.getNode()) &&
14867 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14868 PromOpHandles.emplace_front(PromOp);
14869 continue;
14870 }
14871 }
14872
14874 PromOp.getNode()->op_end());
14875
14876 // If this node has constant inputs, then they'll need to be promoted here.
14877 for (unsigned i = 0; i < 2; ++i) {
14878 if (!isa<ConstantSDNode>(Ops[C+i]))
14879 continue;
14880 if (Ops[C+i].getValueType() == N->getValueType(0))
14881 continue;
14882
14883 if (N->getOpcode() == ISD::SIGN_EXTEND)
14884 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14885 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14886 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14887 else
14888 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14889 }
14890
14891 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14892 // truncate them again to the original value type.
14893 if (PromOp.getOpcode() == ISD::SELECT ||
14894 PromOp.getOpcode() == ISD::SELECT_CC) {
14895 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14896 if (SI0 != SelectTruncOp[0].end())
14897 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14898 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14899 if (SI1 != SelectTruncOp[1].end())
14900 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14901 }
14902
14903 DAG.ReplaceAllUsesOfValueWith(PromOp,
14904 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14905 }
14906
14907 // Now we're left with the initial extension itself.
14908 if (!ReallyNeedsExt)
14909 return N->getOperand(0);
14910
14911 // To zero extend, just mask off everything except for the first bit (in the
14912 // i1 case).
14913 if (N->getOpcode() == ISD::ZERO_EXTEND)
14914 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14916 N->getValueSizeInBits(0), PromBits),
14917 dl, N->getValueType(0)));
14918
14919 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14920 "Invalid extension type");
14921 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14922 SDValue ShiftCst =
14923 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14924 return DAG.getNode(
14925 ISD::SRA, dl, N->getValueType(0),
14926 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14927 ShiftCst);
14928}
14929
14930SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14931 DAGCombinerInfo &DCI) const {
14932 assert(N->getOpcode() == ISD::SETCC &&
14933 "Should be called with a SETCC node");
14934
14935 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14936 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14937 SDValue LHS = N->getOperand(0);
14938 SDValue RHS = N->getOperand(1);
14939
14940 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14941 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14942 LHS.hasOneUse())
14943 std::swap(LHS, RHS);
14944
14945 // x == 0-y --> x+y == 0
14946 // x != 0-y --> x+y != 0
14947 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14948 RHS.hasOneUse()) {
14949 SDLoc DL(N);
14950 SelectionDAG &DAG = DCI.DAG;
14951 EVT VT = N->getValueType(0);
14952 EVT OpVT = LHS.getValueType();
14953 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14954 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14955 }
14956 }
14957
14958 return DAGCombineTruncBoolExt(N, DCI);
14959}
14960
14961// Is this an extending load from an f32 to an f64?
14962static bool isFPExtLoad(SDValue Op) {
14963 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14964 return LD->getExtensionType() == ISD::EXTLOAD &&
14965 Op.getValueType() == MVT::f64;
14966 return false;
14967}
14968
14969/// Reduces the number of fp-to-int conversion when building a vector.
14970///
14971/// If this vector is built out of floating to integer conversions,
14972/// transform it to a vector built out of floating point values followed by a
14973/// single floating to integer conversion of the vector.
14974/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14975/// becomes (fptosi (build_vector ($A, $B, ...)))
14976SDValue PPCTargetLowering::
14977combineElementTruncationToVectorTruncation(SDNode *N,
14978 DAGCombinerInfo &DCI) const {
14979 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14980 "Should be called with a BUILD_VECTOR node");
14981
14982 SelectionDAG &DAG = DCI.DAG;
14983 SDLoc dl(N);
14984
14985 SDValue FirstInput = N->getOperand(0);
14986 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14987 "The input operand must be an fp-to-int conversion.");
14988
14989 // This combine happens after legalization so the fp_to_[su]i nodes are
14990 // already converted to PPCSISD nodes.
14991 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14992 if (FirstConversion == PPCISD::FCTIDZ ||
14993 FirstConversion == PPCISD::FCTIDUZ ||
14994 FirstConversion == PPCISD::FCTIWZ ||
14995 FirstConversion == PPCISD::FCTIWUZ) {
14996 bool IsSplat = true;
14997 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14998 FirstConversion == PPCISD::FCTIWUZ;
14999 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15001 EVT TargetVT = N->getValueType(0);
15002 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15003 SDValue NextOp = N->getOperand(i);
15004 if (NextOp.getOpcode() != PPCISD::MFVSR)
15005 return SDValue();
15006 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15007 if (NextConversion != FirstConversion)
15008 return SDValue();
15009 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15010 // This is not valid if the input was originally double precision. It is
15011 // also not profitable to do unless this is an extending load in which
15012 // case doing this combine will allow us to combine consecutive loads.
15013 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15014 return SDValue();
15015 if (N->getOperand(i) != FirstInput)
15016 IsSplat = false;
15017 }
15018
15019 // If this is a splat, we leave it as-is since there will be only a single
15020 // fp-to-int conversion followed by a splat of the integer. This is better
15021 // for 32-bit and smaller ints and neutral for 64-bit ints.
15022 if (IsSplat)
15023 return SDValue();
15024
15025 // Now that we know we have the right type of node, get its operands
15026 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15027 SDValue In = N->getOperand(i).getOperand(0);
15028 if (Is32Bit) {
15029 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15030 // here, we know that all inputs are extending loads so this is safe).
15031 if (In.isUndef())
15032 Ops.push_back(DAG.getUNDEF(SrcVT));
15033 else {
15034 SDValue Trunc =
15035 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15036 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15037 Ops.push_back(Trunc);
15038 }
15039 } else
15040 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15041 }
15042
15043 unsigned Opcode;
15044 if (FirstConversion == PPCISD::FCTIDZ ||
15045 FirstConversion == PPCISD::FCTIWZ)
15046 Opcode = ISD::FP_TO_SINT;
15047 else
15048 Opcode = ISD::FP_TO_UINT;
15049
15050 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15051 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15052 return DAG.getNode(Opcode, dl, TargetVT, BV);
15053 }
15054 return SDValue();
15055}
15056
15057/// Reduce the number of loads when building a vector.
15058///
15059/// Building a vector out of multiple loads can be converted to a load
15060/// of the vector type if the loads are consecutive. If the loads are
15061/// consecutive but in descending order, a shuffle is added at the end
15062/// to reorder the vector.
15064 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15065 "Should be called with a BUILD_VECTOR node");
15066
15067 SDLoc dl(N);
15068
15069 // Return early for non byte-sized type, as they can't be consecutive.
15070 if (!N->getValueType(0).getVectorElementType().isByteSized())
15071 return SDValue();
15072
15073 bool InputsAreConsecutiveLoads = true;
15074 bool InputsAreReverseConsecutive = true;
15075 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
15076 SDValue FirstInput = N->getOperand(0);
15077 bool IsRoundOfExtLoad = false;
15078 LoadSDNode *FirstLoad = nullptr;
15079
15080 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
15081 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
15082 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
15083 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
15084 }
15085 // Not a build vector of (possibly fp_rounded) loads.
15086 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
15087 N->getNumOperands() == 1)
15088 return SDValue();
15089
15090 if (!IsRoundOfExtLoad)
15091 FirstLoad = cast<LoadSDNode>(FirstInput);
15092
15094 InputLoads.push_back(FirstLoad);
15095 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
15096 // If any inputs are fp_round(extload), they all must be.
15097 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
15098 return SDValue();
15099
15100 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
15101 N->getOperand(i);
15102 if (NextInput.getOpcode() != ISD::LOAD)
15103 return SDValue();
15104
15105 SDValue PreviousInput =
15106 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
15107 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
15108 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
15109
15110 // If any inputs are fp_round(extload), they all must be.
15111 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
15112 return SDValue();
15113
15114 // We only care about regular loads. The PPC-specific load intrinsics
15115 // will not lead to a merge opportunity.
15116 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
15117 InputsAreConsecutiveLoads = false;
15118 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
15119 InputsAreReverseConsecutive = false;
15120
15121 // Exit early if the loads are neither consecutive nor reverse consecutive.
15122 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15123 return SDValue();
15124 InputLoads.push_back(LD2);
15125 }
15126
15127 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15128 "The loads cannot be both consecutive and reverse consecutive.");
15129
15130 SDValue WideLoad;
15131 SDValue ReturnSDVal;
15132 if (InputsAreConsecutiveLoads) {
15133 assert(FirstLoad && "Input needs to be a LoadSDNode.");
15134 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
15135 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15136 FirstLoad->getAlign());
15137 ReturnSDVal = WideLoad;
15138 } else if (InputsAreReverseConsecutive) {
15139 LoadSDNode *LastLoad = InputLoads.back();
15140 assert(LastLoad && "Input needs to be a LoadSDNode.");
15141 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
15142 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
15143 LastLoad->getAlign());
15145 for (int i = N->getNumOperands() - 1; i >= 0; i--)
15146 Ops.push_back(i);
15147
15148 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
15149 DAG.getUNDEF(N->getValueType(0)), Ops);
15150 } else
15151 return SDValue();
15152
15153 for (auto *LD : InputLoads)
15154 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
15155 return ReturnSDVal;
15156}
15157
15158// This function adds the required vector_shuffle needed to get
15159// the elements of the vector extract in the correct position
15160// as specified by the CorrectElems encoding.
15162 SDValue Input, uint64_t Elems,
15163 uint64_t CorrectElems) {
15164 SDLoc dl(N);
15165
15166 unsigned NumElems = Input.getValueType().getVectorNumElements();
15167 SmallVector<int, 16> ShuffleMask(NumElems, -1);
15168
15169 // Knowing the element indices being extracted from the original
15170 // vector and the order in which they're being inserted, just put
15171 // them at element indices required for the instruction.
15172 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15173 if (DAG.getDataLayout().isLittleEndian())
15174 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
15175 else
15176 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
15177 CorrectElems = CorrectElems >> 8;
15178 Elems = Elems >> 8;
15179 }
15180
15181 SDValue Shuffle =
15182 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
15183 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
15184
15185 EVT VT = N->getValueType(0);
15186 SDValue Conv = DAG.getBitcast(VT, Shuffle);
15187
15188 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
15191 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
15192 DAG.getValueType(ExtVT));
15193}
15194
15195// Look for build vector patterns where input operands come from sign
15196// extended vector_extract elements of specific indices. If the correct indices
15197// aren't used, add a vector shuffle to fix up the indices and create
15198// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
15199// during instruction selection.
15201 // This array encodes the indices that the vector sign extend instructions
15202 // extract from when extending from one type to another for both BE and LE.
15203 // The right nibble of each byte corresponds to the LE incides.
15204 // and the left nibble of each byte corresponds to the BE incides.
15205 // For example: 0x3074B8FC byte->word
15206 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
15207 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
15208 // For example: 0x000070F8 byte->double word
15209 // For LE: the allowed indices are: 0x0,0x8
15210 // For BE: the allowed indices are: 0x7,0xF
15211 uint64_t TargetElems[] = {
15212 0x3074B8FC, // b->w
15213 0x000070F8, // b->d
15214 0x10325476, // h->w
15215 0x00003074, // h->d
15216 0x00001032, // w->d
15217 };
15218
15219 uint64_t Elems = 0;
15220 int Index;
15221 SDValue Input;
15222
15223 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
15224 if (!Op)
15225 return false;
15226 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
15227 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
15228 return false;
15229
15230 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
15231 // of the right width.
15232 SDValue Extract = Op.getOperand(0);
15233 if (Extract.getOpcode() == ISD::ANY_EXTEND)
15234 Extract = Extract.getOperand(0);
15235 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15236 return false;
15237
15238 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
15239 if (!ExtOp)
15240 return false;
15241
15242 Index = ExtOp->getZExtValue();
15243 if (Input && Input != Extract.getOperand(0))
15244 return false;
15245
15246 if (!Input)
15247 Input = Extract.getOperand(0);
15248
15249 Elems = Elems << 8;
15250 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
15251 Elems |= Index;
15252
15253 return true;
15254 };
15255
15256 // If the build vector operands aren't sign extended vector extracts,
15257 // of the same input vector, then return.
15258 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15259 if (!isSExtOfVecExtract(N->getOperand(i))) {
15260 return SDValue();
15261 }
15262 }
15263
15264 // If the vector extract indices are not correct, add the appropriate
15265 // vector_shuffle.
15266 int TgtElemArrayIdx;
15267 int InputSize = Input.getValueType().getScalarSizeInBits();
15268 int OutputSize = N->getValueType(0).getScalarSizeInBits();
15269 if (InputSize + OutputSize == 40)
15270 TgtElemArrayIdx = 0;
15271 else if (InputSize + OutputSize == 72)
15272 TgtElemArrayIdx = 1;
15273 else if (InputSize + OutputSize == 48)
15274 TgtElemArrayIdx = 2;
15275 else if (InputSize + OutputSize == 80)
15276 TgtElemArrayIdx = 3;
15277 else if (InputSize + OutputSize == 96)
15278 TgtElemArrayIdx = 4;
15279 else
15280 return SDValue();
15281
15282 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
15283 CorrectElems = DAG.getDataLayout().isLittleEndian()
15284 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
15285 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
15286 if (Elems != CorrectElems) {
15287 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
15288 }
15289
15290 // Regular lowering will catch cases where a shuffle is not needed.
15291 return SDValue();
15292}
15293
15294// Look for the pattern of a load from a narrow width to i128, feeding
15295// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15296// (LXVRZX). This node represents a zero extending load that will be matched
15297// to the Load VSX Vector Rightmost instructions.
15299 SDLoc DL(N);
15300
15301 // This combine is only eligible for a BUILD_VECTOR of v1i128.
15302 if (N->getValueType(0) != MVT::v1i128)
15303 return SDValue();
15304
15305 SDValue Operand = N->getOperand(0);
15306 // Proceed with the transformation if the operand to the BUILD_VECTOR
15307 // is a load instruction.
15308 if (Operand.getOpcode() != ISD::LOAD)
15309 return SDValue();
15310
15311 auto *LD = cast<LoadSDNode>(Operand);
15312 EVT MemoryType = LD->getMemoryVT();
15313
15314 // This transformation is only valid if the we are loading either a byte,
15315 // halfword, word, or doubleword.
15316 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15317 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15318
15319 // Ensure that the load from the narrow width is being zero extended to i128.
15320 if (!ValidLDType ||
15321 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15322 LD->getExtensionType() != ISD::EXTLOAD))
15323 return SDValue();
15324
15325 SDValue LoadOps[] = {
15326 LD->getChain(), LD->getBasePtr(),
15327 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15328
15330 DAG.getVTList(MVT::v1i128, MVT::Other),
15331 LoadOps, MemoryType, LD->getMemOperand());
15332}
15333
15334SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15335 DAGCombinerInfo &DCI) const {
15336 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15337 "Should be called with a BUILD_VECTOR node");
15338
15339 SelectionDAG &DAG = DCI.DAG;
15340 SDLoc dl(N);
15341
15342 if (!Subtarget.hasVSX())
15343 return SDValue();
15344
15345 // The target independent DAG combiner will leave a build_vector of
15346 // float-to-int conversions intact. We can generate MUCH better code for
15347 // a float-to-int conversion of a vector of floats.
15348 SDValue FirstInput = N->getOperand(0);
15349 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15350 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15351 if (Reduced)
15352 return Reduced;
15353 }
15354
15355 // If we're building a vector out of consecutive loads, just load that
15356 // vector type.
15357 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15358 if (Reduced)
15359 return Reduced;
15360
15361 // If we're building a vector out of extended elements from another vector
15362 // we have P9 vector integer extend instructions. The code assumes legal
15363 // input types (i.e. it can't handle things like v4i16) so do not run before
15364 // legalization.
15365 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15366 Reduced = combineBVOfVecSExt(N, DAG);
15367 if (Reduced)
15368 return Reduced;
15369 }
15370
15371 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15372 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15373 // is a load from <valid narrow width> to i128.
15374 if (Subtarget.isISA3_1()) {
15375 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15376 if (BVOfZLoad)
15377 return BVOfZLoad;
15378 }
15379
15380 if (N->getValueType(0) != MVT::v2f64)
15381 return SDValue();
15382
15383 // Looking for:
15384 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15385 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15386 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15387 return SDValue();
15388 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15389 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15390 return SDValue();
15391 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15392 return SDValue();
15393
15394 SDValue Ext1 = FirstInput.getOperand(0);
15395 SDValue Ext2 = N->getOperand(1).getOperand(0);
15396 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15398 return SDValue();
15399
15400 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15401 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15402 if (!Ext1Op || !Ext2Op)
15403 return SDValue();
15404 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15405 Ext1.getOperand(0) != Ext2.getOperand(0))
15406 return SDValue();
15407
15408 int FirstElem = Ext1Op->getZExtValue();
15409 int SecondElem = Ext2Op->getZExtValue();
15410 int SubvecIdx;
15411 if (FirstElem == 0 && SecondElem == 1)
15412 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15413 else if (FirstElem == 2 && SecondElem == 3)
15414 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15415 else
15416 return SDValue();
15417
15418 SDValue SrcVec = Ext1.getOperand(0);
15419 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15421 return DAG.getNode(NodeType, dl, MVT::v2f64,
15422 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15423}
15424
15425SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15426 DAGCombinerInfo &DCI) const {
15427 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15428 N->getOpcode() == ISD::UINT_TO_FP) &&
15429 "Need an int -> FP conversion node here");
15430
15431 if (useSoftFloat() || !Subtarget.has64BitSupport())
15432 return SDValue();
15433
15434 SelectionDAG &DAG = DCI.DAG;
15435 SDLoc dl(N);
15436 SDValue Op(N, 0);
15437
15438 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15439 // from the hardware.
15440 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15441 return SDValue();
15442 if (!Op.getOperand(0).getValueType().isSimple())
15443 return SDValue();
15444 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15445 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15446 return SDValue();
15447
15448 SDValue FirstOperand(Op.getOperand(0));
15449 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15450 (FirstOperand.getValueType() == MVT::i8 ||
15451 FirstOperand.getValueType() == MVT::i16);
15452 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15453 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15454 bool DstDouble = Op.getValueType() == MVT::f64;
15455 unsigned ConvOp = Signed ?
15456 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15457 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15458 SDValue WidthConst =
15459 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15460 dl, false);
15461 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15462 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15464 DAG.getVTList(MVT::f64, MVT::Other),
15465 Ops, MVT::i8, LDN->getMemOperand());
15466 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15467
15468 // For signed conversion, we need to sign-extend the value in the VSR
15469 if (Signed) {
15470 SDValue ExtOps[] = { Ld, WidthConst };
15471 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15472 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15473 } else
15474 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15475 }
15476
15477
15478 // For i32 intermediate values, unfortunately, the conversion functions
15479 // leave the upper 32 bits of the value are undefined. Within the set of
15480 // scalar instructions, we have no method for zero- or sign-extending the
15481 // value. Thus, we cannot handle i32 intermediate values here.
15482 if (Op.getOperand(0).getValueType() == MVT::i32)
15483 return SDValue();
15484
15485 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15486 "UINT_TO_FP is supported only with FPCVT");
15487
15488 // If we have FCFIDS, then use it when converting to single-precision.
15489 // Otherwise, convert to double-precision and then round.
15490 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15491 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15493 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15494 : PPCISD::FCFID);
15495 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15496 ? MVT::f32
15497 : MVT::f64;
15498
15499 // If we're converting from a float, to an int, and back to a float again,
15500 // then we don't need the store/load pair at all.
15501 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15502 Subtarget.hasFPCVT()) ||
15503 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15504 SDValue Src = Op.getOperand(0).getOperand(0);
15505 if (Src.getValueType() == MVT::f32) {
15506 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15507 DCI.AddToWorklist(Src.getNode());
15508 } else if (Src.getValueType() != MVT::f64) {
15509 // Make sure that we don't pick up a ppc_fp128 source value.
15510 return SDValue();
15511 }
15512
15513 unsigned FCTOp =
15514 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15516
15517 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15518 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15519
15520 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15521 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15522 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15523 DCI.AddToWorklist(FP.getNode());
15524 }
15525
15526 return FP;
15527 }
15528
15529 return SDValue();
15530}
15531
15532// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15533// builtins) into loads with swaps.
15535 DAGCombinerInfo &DCI) const {
15536 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15537 // load combines.
15538 if (DCI.isBeforeLegalizeOps())
15539 return SDValue();
15540
15541 SelectionDAG &DAG = DCI.DAG;
15542 SDLoc dl(N);
15543 SDValue Chain;
15544 SDValue Base;
15545 MachineMemOperand *MMO;
15546
15547 switch (N->getOpcode()) {
15548 default:
15549 llvm_unreachable("Unexpected opcode for little endian VSX load");
15550 case ISD::LOAD: {
15551 LoadSDNode *LD = cast<LoadSDNode>(N);
15552 Chain = LD->getChain();
15553 Base = LD->getBasePtr();
15554 MMO = LD->getMemOperand();
15555 // If the MMO suggests this isn't a load of a full vector, leave
15556 // things alone. For a built-in, we have to make the change for
15557 // correctness, so if there is a size problem that will be a bug.
15558 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15559 return SDValue();
15560 break;
15561 }
15563 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15564 Chain = Intrin->getChain();
15565 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15566 // us what we want. Get operand 2 instead.
15567 Base = Intrin->getOperand(2);
15568 MMO = Intrin->getMemOperand();
15569 break;
15570 }
15571 }
15572
15573 MVT VecTy = N->getValueType(0).getSimpleVT();
15574
15575 SDValue LoadOps[] = { Chain, Base };
15577 DAG.getVTList(MVT::v2f64, MVT::Other),
15578 LoadOps, MVT::v2f64, MMO);
15579
15580 DCI.AddToWorklist(Load.getNode());
15581 Chain = Load.getValue(1);
15582 SDValue Swap = DAG.getNode(
15583 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15584 DCI.AddToWorklist(Swap.getNode());
15585
15586 // Add a bitcast if the resulting load type doesn't match v2f64.
15587 if (VecTy != MVT::v2f64) {
15588 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15589 DCI.AddToWorklist(N.getNode());
15590 // Package {bitcast value, swap's chain} to match Load's shape.
15591 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15592 N, Swap.getValue(1));
15593 }
15594
15595 return Swap;
15596}
15597
15598// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15599// builtins) into stores with swaps.
15601 DAGCombinerInfo &DCI) const {
15602 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15603 // store combines.
15604 if (DCI.isBeforeLegalizeOps())
15605 return SDValue();
15606
15607 SelectionDAG &DAG = DCI.DAG;
15608 SDLoc dl(N);
15609 SDValue Chain;
15610 SDValue Base;
15611 unsigned SrcOpnd;
15612 MachineMemOperand *MMO;
15613
15614 switch (N->getOpcode()) {
15615 default:
15616 llvm_unreachable("Unexpected opcode for little endian VSX store");
15617 case ISD::STORE: {
15618 StoreSDNode *ST = cast<StoreSDNode>(N);
15619 Chain = ST->getChain();
15620 Base = ST->getBasePtr();
15621 MMO = ST->getMemOperand();
15622 SrcOpnd = 1;
15623 // If the MMO suggests this isn't a store of a full vector, leave
15624 // things alone. For a built-in, we have to make the change for
15625 // correctness, so if there is a size problem that will be a bug.
15626 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15627 return SDValue();
15628 break;
15629 }
15630 case ISD::INTRINSIC_VOID: {
15631 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15632 Chain = Intrin->getChain();
15633 // Intrin->getBasePtr() oddly does not get what we want.
15634 Base = Intrin->getOperand(3);
15635 MMO = Intrin->getMemOperand();
15636 SrcOpnd = 2;
15637 break;
15638 }
15639 }
15640
15641 SDValue Src = N->getOperand(SrcOpnd);
15642 MVT VecTy = Src.getValueType().getSimpleVT();
15643
15644 // All stores are done as v2f64 and possible bit cast.
15645 if (VecTy != MVT::v2f64) {
15646 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15647 DCI.AddToWorklist(Src.getNode());
15648 }
15649
15650 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15651 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15652 DCI.AddToWorklist(Swap.getNode());
15653 Chain = Swap.getValue(1);
15654 SDValue StoreOps[] = { Chain, Swap, Base };
15656 DAG.getVTList(MVT::Other),
15657 StoreOps, VecTy, MMO);
15658 DCI.AddToWorklist(Store.getNode());
15659 return Store;
15660}
15661
15662// Handle DAG combine for STORE (FP_TO_INT F).
15663SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15664 DAGCombinerInfo &DCI) const {
15665 SelectionDAG &DAG = DCI.DAG;
15666 SDLoc dl(N);
15667 unsigned Opcode = N->getOperand(1).getOpcode();
15668 (void)Opcode;
15669 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15670
15671 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15672 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15673 && "Not a FP_TO_INT Instruction!");
15674
15675 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15676 EVT Op1VT = N->getOperand(1).getValueType();
15677 EVT ResVT = Val.getValueType();
15678
15679 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15680 return SDValue();
15681
15682 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15683 bool ValidTypeForStoreFltAsInt =
15684 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15685 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15686
15687 // TODO: Lower conversion from f128 on all VSX targets
15688 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15689 return SDValue();
15690
15691 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15692 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15693 return SDValue();
15694
15695 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15696
15697 // Set number of bytes being converted.
15698 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15699 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15700 DAG.getIntPtrConstant(ByteSize, dl, false),
15701 DAG.getValueType(Op1VT)};
15702
15704 DAG.getVTList(MVT::Other), Ops,
15705 cast<StoreSDNode>(N)->getMemoryVT(),
15706 cast<StoreSDNode>(N)->getMemOperand());
15707
15708 return Val;
15709}
15710
15711static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15712 // Check that the source of the element keeps flipping
15713 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15714 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15715 for (int i = 1, e = Mask.size(); i < e; i++) {
15716 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15717 return false;
15718 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15719 return false;
15720 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15721 }
15722 return true;
15723}
15724
15725static bool isSplatBV(SDValue Op) {
15726 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15727 return false;
15728 SDValue FirstOp;
15729
15730 // Find first non-undef input.
15731 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15732 FirstOp = Op.getOperand(i);
15733 if (!FirstOp.isUndef())
15734 break;
15735 }
15736
15737 // All inputs are undef or the same as the first non-undef input.
15738 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15739 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15740 return false;
15741 return true;
15742}
15743
15745 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15746 return Op;
15747 if (Op.getOpcode() != ISD::BITCAST)
15748 return SDValue();
15749 Op = Op.getOperand(0);
15750 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15751 return Op;
15752 return SDValue();
15753}
15754
15755// Fix up the shuffle mask to account for the fact that the result of
15756// scalar_to_vector is not in lane zero. This just takes all values in
15757// the ranges specified by the min/max indices and adds the number of
15758// elements required to ensure each element comes from the respective
15759// position in the valid lane.
15760// On little endian, that's just the corresponding element in the other
15761// half of the vector. On big endian, it is in the same half but right
15762// justified rather than left justified in that half.
15764 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15765 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15766 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15767 int LHSEltFixup =
15768 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15769 int RHSEltFixup =
15770 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15771 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
15772 int Idx = ShuffV[I];
15773 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15774 ShuffV[I] += LHSEltFixup;
15775 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15776 ShuffV[I] += RHSEltFixup;
15777 }
15778}
15779
15780// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15781// the original is:
15782// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15783// In such a case, just change the shuffle mask to extract the element
15784// from the permuted index.
15786 const PPCSubtarget &Subtarget) {
15787 SDLoc dl(OrigSToV);
15788 EVT VT = OrigSToV.getValueType();
15789 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15790 "Expecting a SCALAR_TO_VECTOR here");
15791 SDValue Input = OrigSToV.getOperand(0);
15792
15793 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15794 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15795 SDValue OrigVector = Input.getOperand(0);
15796
15797 // Can't handle non-const element indices or different vector types
15798 // for the input to the extract and the output of the scalar_to_vector.
15799 if (Idx && VT == OrigVector.getValueType()) {
15800 unsigned NumElts = VT.getVectorNumElements();
15801 assert(
15802 NumElts > 1 &&
15803 "Cannot produce a permuted scalar_to_vector for one element vector");
15804 SmallVector<int, 16> NewMask(NumElts, -1);
15805 unsigned ResultInElt = NumElts / 2;
15806 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15807 NewMask[ResultInElt] = Idx->getZExtValue();
15808 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15809 }
15810 }
15811 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15812 OrigSToV.getOperand(0));
15813}
15814
15816 int HalfVec, int LHSLastElementDefined,
15817 int RHSLastElementDefined) {
15818 for (int Index : ShuffV) {
15819 if (Index < 0) // Skip explicitly undefined mask indices.
15820 continue;
15821 // Handle first input vector of the vector_shuffle.
15822 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15823 (Index > LHSLastElementDefined))
15824 return false;
15825 // Handle second input vector of the vector_shuffle.
15826 if ((RHSLastElementDefined >= 0) &&
15827 (Index > HalfVec + RHSLastElementDefined))
15828 return false;
15829 }
15830 return true;
15831}
15832
15834 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15835 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15836 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15837 EVT VecShuffOperandType = VecShuffOperand.getValueType();
15838 // Set up the values for the shuffle vector fixup.
15839 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15840 // The last element depends on if the input comes from the LHS or RHS.
15841 //
15842 // For example:
15843 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15844 //
15845 // For the LHS: The last element that comes from the LHS is actually 0, not 3
15846 // because elements 1 and higher of a scalar_to_vector are undefined.
15847 // For the RHS: The last element that comes from the RHS is actually 5, not 7
15848 // because elements 1 and higher of a scalar_to_vector are undefined.
15849 // It is also not 4 because the original scalar_to_vector is wider and
15850 // actually contains two i32 elements.
15851 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
15852 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
15853 : FirstElt;
15854 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15855 if (SToVPermuted.getValueType() != VecShuffOperandType)
15856 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15857 return SToVPermuted;
15858}
15859
15860// On little endian subtargets, combine shuffles such as:
15861// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15862// into:
15863// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15864// because the latter can be matched to a single instruction merge.
15865// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15866// to put the value into element zero. Adjust the shuffle mask so that the
15867// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15868// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15869// nodes with elements smaller than doubleword because all the ways
15870// of getting scalar data into a vector register put the value in the
15871// rightmost element of the left half of the vector.
15872SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15873 SelectionDAG &DAG) const {
15874 SDValue LHS = SVN->getOperand(0);
15875 SDValue RHS = SVN->getOperand(1);
15876 auto Mask = SVN->getMask();
15877 int NumElts = LHS.getValueType().getVectorNumElements();
15878 SDValue Res(SVN, 0);
15879 SDLoc dl(SVN);
15880 bool IsLittleEndian = Subtarget.isLittleEndian();
15881
15882 // On big endian targets this is only useful for subtargets with direct moves.
15883 // On little endian targets it would be useful for all subtargets with VSX.
15884 // However adding special handling for LE subtargets without direct moves
15885 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15886 // which includes direct moves.
15887 if (!Subtarget.hasDirectMove())
15888 return Res;
15889
15890 // If this is not a shuffle of a shuffle and the first element comes from
15891 // the second vector, canonicalize to the commuted form. This will make it
15892 // more likely to match one of the single instruction patterns.
15893 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15894 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15895 std::swap(LHS, RHS);
15896 Res = DAG.getCommutedVectorShuffle(*SVN);
15897 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15898 }
15899
15900 // Adjust the shuffle mask if either input vector comes from a
15901 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15902 // form (to prevent the need for a swap).
15903 SmallVector<int, 16> ShuffV(Mask);
15904 SDValue SToVLHS = isScalarToVec(LHS);
15905 SDValue SToVRHS = isScalarToVec(RHS);
15906 if (SToVLHS || SToVRHS) {
15907 EVT VT = SVN->getValueType(0);
15908 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15909 int ShuffleNumElts = ShuffV.size();
15910 int HalfVec = ShuffleNumElts / 2;
15911 // The width of the "valid lane" (i.e. the lane that contains the value that
15912 // is vectorized) needs to be expressed in terms of the number of elements
15913 // of the shuffle. It is thereby the ratio of the values before and after
15914 // any bitcast, which will be set later on if the LHS or RHS are
15915 // SCALAR_TO_VECTOR nodes.
15916 unsigned LHSNumValidElts = HalfVec;
15917 unsigned RHSNumValidElts = HalfVec;
15918
15919 // Initially assume that neither input is permuted. These will be adjusted
15920 // accordingly if either input is. Note, that -1 means that all elements
15921 // are undefined.
15922 int LHSFirstElt = 0;
15923 int RHSFirstElt = ShuffleNumElts;
15924 int LHSLastElt = -1;
15925 int RHSLastElt = -1;
15926
15927 // Get the permuted scalar to vector nodes for the source(s) that come from
15928 // ISD::SCALAR_TO_VECTOR.
15929 // On big endian systems, this only makes sense for element sizes smaller
15930 // than 64 bits since for 64-bit elements, all instructions already put
15931 // the value into element zero. Since scalar size of LHS and RHS may differ
15932 // after isScalarToVec, this should be checked using their own sizes.
15933 int LHSScalarSize = 0;
15934 int RHSScalarSize = 0;
15935 if (SToVLHS) {
15936 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15937 if (!IsLittleEndian && LHSScalarSize >= 64)
15938 return Res;
15939 }
15940 if (SToVRHS) {
15941 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15942 if (!IsLittleEndian && RHSScalarSize >= 64)
15943 return Res;
15944 }
15945 if (LHSScalarSize != 0)
15947 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15948 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
15949 if (RHSScalarSize != 0)
15951 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15952 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
15953
15954 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15955 return Res;
15956
15957 // Fix up the shuffle mask to reflect where the desired element actually is.
15958 // The minimum and maximum indices that correspond to element zero for both
15959 // the LHS and RHS are computed and will control which shuffle mask entries
15960 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15961 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15963 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15964 LHSNumValidElts, RHSNumValidElts, Subtarget);
15965 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15966
15967 // We may have simplified away the shuffle. We won't be able to do anything
15968 // further with it here.
15969 if (!isa<ShuffleVectorSDNode>(Res))
15970 return Res;
15971 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15972 }
15973
15974 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15975 // The common case after we commuted the shuffle is that the RHS is a splat
15976 // and we have elements coming in from the splat at indices that are not
15977 // conducive to using a merge.
15978 // Example:
15979 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15980 if (!isSplatBV(TheSplat))
15981 return Res;
15982
15983 // We are looking for a mask such that all even elements are from
15984 // one vector and all odd elements from the other.
15985 if (!isAlternatingShuffMask(Mask, NumElts))
15986 return Res;
15987
15988 // Adjust the mask so we are pulling in the same index from the splat
15989 // as the index from the interesting vector in consecutive elements.
15990 if (IsLittleEndian) {
15991 // Example (even elements from first vector):
15992 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15993 if (Mask[0] < NumElts)
15994 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15995 if (ShuffV[i] < 0)
15996 continue;
15997 // If element from non-splat is undef, pick first element from splat.
15998 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15999 }
16000 // Example (odd elements from first vector):
16001 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
16002 else
16003 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16004 if (ShuffV[i] < 0)
16005 continue;
16006 // If element from non-splat is undef, pick first element from splat.
16007 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
16008 }
16009 } else {
16010 // Example (even elements from first vector):
16011 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
16012 if (Mask[0] < NumElts)
16013 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16014 if (ShuffV[i] < 0)
16015 continue;
16016 // If element from non-splat is undef, pick first element from splat.
16017 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
16018 }
16019 // Example (odd elements from first vector):
16020 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16021 else
16022 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16023 if (ShuffV[i] < 0)
16024 continue;
16025 // If element from non-splat is undef, pick first element from splat.
16026 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
16027 }
16028 }
16029
16030 // If the RHS has undefs, we need to remove them since we may have created
16031 // a shuffle that adds those instead of the splat value.
16032 SDValue SplatVal =
16033 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
16034 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
16035
16036 if (IsLittleEndian)
16037 RHS = TheSplat;
16038 else
16039 LHS = TheSplat;
16040 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16041}
16042
16043SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
16044 LSBaseSDNode *LSBase,
16045 DAGCombinerInfo &DCI) const {
16046 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
16047 "Not a reverse memop pattern!");
16048
16049 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
16050 auto Mask = SVN->getMask();
16051 int i = 0;
16052 auto I = Mask.rbegin();
16053 auto E = Mask.rend();
16054
16055 for (; I != E; ++I) {
16056 if (*I != i)
16057 return false;
16058 i++;
16059 }
16060 return true;
16061 };
16062
16063 SelectionDAG &DAG = DCI.DAG;
16064 EVT VT = SVN->getValueType(0);
16065
16066 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
16067 return SDValue();
16068
16069 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
16070 // See comment in PPCVSXSwapRemoval.cpp.
16071 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
16072 if (!Subtarget.hasP9Vector())
16073 return SDValue();
16074
16075 if(!IsElementReverse(SVN))
16076 return SDValue();
16077
16078 if (LSBase->getOpcode() == ISD::LOAD) {
16079 // If the load return value 0 has more than one user except the
16080 // shufflevector instruction, it is not profitable to replace the
16081 // shufflevector with a reverse load.
16082 for (SDUse &Use : LSBase->uses())
16083 if (Use.getResNo() == 0 &&
16084 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
16085 return SDValue();
16086
16087 SDLoc dl(LSBase);
16088 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
16089 return DAG.getMemIntrinsicNode(
16090 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
16091 LSBase->getMemoryVT(), LSBase->getMemOperand());
16092 }
16093
16094 if (LSBase->getOpcode() == ISD::STORE) {
16095 // If there are other uses of the shuffle, the swap cannot be avoided.
16096 // Forcing the use of an X-Form (since swapped stores only have
16097 // X-Forms) without removing the swap is unprofitable.
16098 if (!SVN->hasOneUse())
16099 return SDValue();
16100
16101 SDLoc dl(LSBase);
16102 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
16103 LSBase->getBasePtr()};
16104 return DAG.getMemIntrinsicNode(
16105 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
16106 LSBase->getMemoryVT(), LSBase->getMemOperand());
16107 }
16108
16109 llvm_unreachable("Expected a load or store node here");
16110}
16111
16112static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
16113 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
16114 if (IntrinsicID == Intrinsic::ppc_stdcx)
16115 StoreWidth = 8;
16116 else if (IntrinsicID == Intrinsic::ppc_stwcx)
16117 StoreWidth = 4;
16118 else if (IntrinsicID == Intrinsic::ppc_sthcx)
16119 StoreWidth = 2;
16120 else if (IntrinsicID == Intrinsic::ppc_stbcx)
16121 StoreWidth = 1;
16122 else
16123 return false;
16124 return true;
16125}
16126
16128 DAGCombinerInfo &DCI) const {
16129 SelectionDAG &DAG = DCI.DAG;
16130 SDLoc dl(N);
16131 switch (N->getOpcode()) {
16132 default: break;
16133 case ISD::ADD:
16134 return combineADD(N, DCI);
16135 case ISD::AND: {
16136 // We don't want (and (zext (shift...)), C) if C fits in the width of the
16137 // original input as that will prevent us from selecting optimal rotates.
16138 // This only matters if the input to the extend is i32 widened to i64.
16139 SDValue Op1 = N->getOperand(0);
16140 SDValue Op2 = N->getOperand(1);
16141 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
16142 Op1.getOpcode() != ISD::ANY_EXTEND) ||
16143 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
16144 Op1.getOperand(0).getValueType() != MVT::i32)
16145 break;
16146 SDValue NarrowOp = Op1.getOperand(0);
16147 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
16148 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
16149 break;
16150
16151 uint64_t Imm = Op2->getAsZExtVal();
16152 // Make sure that the constant is narrow enough to fit in the narrow type.
16153 if (!isUInt<32>(Imm))
16154 break;
16155 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
16156 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
16157 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
16158 }
16159 case ISD::SHL:
16160 return combineSHL(N, DCI);
16161 case ISD::SRA:
16162 return combineSRA(N, DCI);
16163 case ISD::SRL:
16164 return combineSRL(N, DCI);
16165 case ISD::MUL:
16166 return combineMUL(N, DCI);
16167 case ISD::FMA:
16168 case PPCISD::FNMSUB:
16169 return combineFMALike(N, DCI);
16170 case PPCISD::SHL:
16171 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
16172 return N->getOperand(0);
16173 break;
16174 case PPCISD::SRL:
16175 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
16176 return N->getOperand(0);
16177 break;
16178 case PPCISD::SRA:
16179 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
16180 if (C->isZero() || // 0 >>s V -> 0.
16181 C->isAllOnes()) // -1 >>s V -> -1.
16182 return N->getOperand(0);
16183 }
16184 break;
16185 case ISD::SIGN_EXTEND:
16186 case ISD::ZERO_EXTEND:
16187 case ISD::ANY_EXTEND:
16188 return DAGCombineExtBoolTrunc(N, DCI);
16189 case ISD::TRUNCATE:
16190 return combineTRUNCATE(N, DCI);
16191 case ISD::SETCC:
16192 if (SDValue CSCC = combineSetCC(N, DCI))
16193 return CSCC;
16194 [[fallthrough]];
16195 case ISD::SELECT_CC:
16196 return DAGCombineTruncBoolExt(N, DCI);
16197 case ISD::SINT_TO_FP:
16198 case ISD::UINT_TO_FP:
16199 return combineFPToIntToFP(N, DCI);
16201 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
16202 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
16203 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
16204 }
16205 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
16206 case ISD::STORE: {
16207
16208 EVT Op1VT = N->getOperand(1).getValueType();
16209 unsigned Opcode = N->getOperand(1).getOpcode();
16210
16211 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16212 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
16213 SDValue Val = combineStoreFPToInt(N, DCI);
16214 if (Val)
16215 return Val;
16216 }
16217
16218 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
16219 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
16220 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
16221 if (Val)
16222 return Val;
16223 }
16224
16225 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
16226 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
16227 N->getOperand(1).getNode()->hasOneUse() &&
16228 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
16229 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
16230
16231 // STBRX can only handle simple types and it makes no sense to store less
16232 // two bytes in byte-reversed order.
16233 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
16234 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
16235 break;
16236
16237 SDValue BSwapOp = N->getOperand(1).getOperand(0);
16238 // Do an any-extend to 32-bits if this is a half-word input.
16239 if (BSwapOp.getValueType() == MVT::i16)
16240 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
16241
16242 // If the type of BSWAP operand is wider than stored memory width
16243 // it need to be shifted to the right side before STBRX.
16244 if (Op1VT.bitsGT(mVT)) {
16245 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
16246 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
16247 DAG.getConstant(Shift, dl, MVT::i32));
16248 // Need to truncate if this is a bswap of i64 stored as i32/i16.
16249 if (Op1VT == MVT::i64)
16250 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
16251 }
16252
16253 SDValue Ops[] = {
16254 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
16255 };
16256 return
16257 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
16258 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
16259 cast<StoreSDNode>(N)->getMemOperand());
16260 }
16261
16262 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
16263 // So it can increase the chance of CSE constant construction.
16264 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
16265 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
16266 // Need to sign-extended to 64-bits to handle negative values.
16267 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
16268 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
16269 MemVT.getSizeInBits());
16270 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
16271
16272 // DAG.getTruncStore() can't be used here because it doesn't accept
16273 // the general (base + offset) addressing mode.
16274 // So we use UpdateNodeOperands and setTruncatingStore instead.
16275 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
16276 N->getOperand(3));
16277 cast<StoreSDNode>(N)->setTruncatingStore(true);
16278 return SDValue(N, 0);
16279 }
16280
16281 // For little endian, VSX stores require generating xxswapd/lxvd2x.
16282 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16283 if (Op1VT.isSimple()) {
16284 MVT StoreVT = Op1VT.getSimpleVT();
16285 if (Subtarget.needsSwapsForVSXMemOps() &&
16286 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
16287 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
16288 return expandVSXStoreForLE(N, DCI);
16289 }
16290 break;
16291 }
16292 case ISD::LOAD: {
16293 LoadSDNode *LD = cast<LoadSDNode>(N);
16294 EVT VT = LD->getValueType(0);
16295
16296 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16297 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16298 if (VT.isSimple()) {
16299 MVT LoadVT = VT.getSimpleVT();
16300 if (Subtarget.needsSwapsForVSXMemOps() &&
16301 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
16302 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
16303 return expandVSXLoadForLE(N, DCI);
16304 }
16305
16306 // We sometimes end up with a 64-bit integer load, from which we extract
16307 // two single-precision floating-point numbers. This happens with
16308 // std::complex<float>, and other similar structures, because of the way we
16309 // canonicalize structure copies. However, if we lack direct moves,
16310 // then the final bitcasts from the extracted integer values to the
16311 // floating-point numbers turn into store/load pairs. Even with direct moves,
16312 // just loading the two floating-point numbers is likely better.
16313 auto ReplaceTwoFloatLoad = [&]() {
16314 if (VT != MVT::i64)
16315 return false;
16316
16317 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
16318 LD->isVolatile())
16319 return false;
16320
16321 // We're looking for a sequence like this:
16322 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
16323 // t16: i64 = srl t13, Constant:i32<32>
16324 // t17: i32 = truncate t16
16325 // t18: f32 = bitcast t17
16326 // t19: i32 = truncate t13
16327 // t20: f32 = bitcast t19
16328
16329 if (!LD->hasNUsesOfValue(2, 0))
16330 return false;
16331
16332 auto UI = LD->user_begin();
16333 while (UI.getUse().getResNo() != 0) ++UI;
16334 SDNode *Trunc = *UI++;
16335 while (UI.getUse().getResNo() != 0) ++UI;
16336 SDNode *RightShift = *UI;
16337 if (Trunc->getOpcode() != ISD::TRUNCATE)
16338 std::swap(Trunc, RightShift);
16339
16340 if (Trunc->getOpcode() != ISD::TRUNCATE ||
16341 Trunc->getValueType(0) != MVT::i32 ||
16342 !Trunc->hasOneUse())
16343 return false;
16344 if (RightShift->getOpcode() != ISD::SRL ||
16345 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
16346 RightShift->getConstantOperandVal(1) != 32 ||
16347 !RightShift->hasOneUse())
16348 return false;
16349
16350 SDNode *Trunc2 = *RightShift->user_begin();
16351 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16352 Trunc2->getValueType(0) != MVT::i32 ||
16353 !Trunc2->hasOneUse())
16354 return false;
16355
16356 SDNode *Bitcast = *Trunc->user_begin();
16357 SDNode *Bitcast2 = *Trunc2->user_begin();
16358
16359 if (Bitcast->getOpcode() != ISD::BITCAST ||
16360 Bitcast->getValueType(0) != MVT::f32)
16361 return false;
16362 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16363 Bitcast2->getValueType(0) != MVT::f32)
16364 return false;
16365
16366 if (Subtarget.isLittleEndian())
16367 std::swap(Bitcast, Bitcast2);
16368
16369 // Bitcast has the second float (in memory-layout order) and Bitcast2
16370 // has the first one.
16371
16372 SDValue BasePtr = LD->getBasePtr();
16373 if (LD->isIndexed()) {
16374 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16375 "Non-pre-inc AM on PPC?");
16376 BasePtr =
16377 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16378 LD->getOffset());
16379 }
16380
16381 auto MMOFlags =
16382 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16383 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16384 LD->getPointerInfo(), LD->getAlign(),
16385 MMOFlags, LD->getAAInfo());
16386 SDValue AddPtr =
16387 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16388 BasePtr, DAG.getIntPtrConstant(4, dl));
16389 SDValue FloatLoad2 = DAG.getLoad(
16390 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16391 LD->getPointerInfo().getWithOffset(4),
16392 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16393
16394 if (LD->isIndexed()) {
16395 // Note that DAGCombine should re-form any pre-increment load(s) from
16396 // what is produced here if that makes sense.
16397 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16398 }
16399
16400 DCI.CombineTo(Bitcast2, FloatLoad);
16401 DCI.CombineTo(Bitcast, FloatLoad2);
16402
16403 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16404 SDValue(FloatLoad2.getNode(), 1));
16405 return true;
16406 };
16407
16408 if (ReplaceTwoFloatLoad())
16409 return SDValue(N, 0);
16410
16411 EVT MemVT = LD->getMemoryVT();
16412 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16413 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16414 if (LD->isUnindexed() && VT.isVector() &&
16415 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16416 // P8 and later hardware should just use LOAD.
16417 !Subtarget.hasP8Vector() &&
16418 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16419 VT == MVT::v4f32))) &&
16420 LD->getAlign() < ABIAlignment) {
16421 // This is a type-legal unaligned Altivec load.
16422 SDValue Chain = LD->getChain();
16423 SDValue Ptr = LD->getBasePtr();
16424 bool isLittleEndian = Subtarget.isLittleEndian();
16425
16426 // This implements the loading of unaligned vectors as described in
16427 // the venerable Apple Velocity Engine overview. Specifically:
16428 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16429 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16430 //
16431 // The general idea is to expand a sequence of one or more unaligned
16432 // loads into an alignment-based permutation-control instruction (lvsl
16433 // or lvsr), a series of regular vector loads (which always truncate
16434 // their input address to an aligned address), and a series of
16435 // permutations. The results of these permutations are the requested
16436 // loaded values. The trick is that the last "extra" load is not taken
16437 // from the address you might suspect (sizeof(vector) bytes after the
16438 // last requested load), but rather sizeof(vector) - 1 bytes after the
16439 // last requested vector. The point of this is to avoid a page fault if
16440 // the base address happened to be aligned. This works because if the
16441 // base address is aligned, then adding less than a full vector length
16442 // will cause the last vector in the sequence to be (re)loaded.
16443 // Otherwise, the next vector will be fetched as you might suspect was
16444 // necessary.
16445
16446 // We might be able to reuse the permutation generation from
16447 // a different base address offset from this one by an aligned amount.
16448 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16449 // optimization later.
16450 Intrinsic::ID Intr, IntrLD, IntrPerm;
16451 MVT PermCntlTy, PermTy, LDTy;
16452 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16453 : Intrinsic::ppc_altivec_lvsl;
16454 IntrLD = Intrinsic::ppc_altivec_lvx;
16455 IntrPerm = Intrinsic::ppc_altivec_vperm;
16456 PermCntlTy = MVT::v16i8;
16457 PermTy = MVT::v4i32;
16458 LDTy = MVT::v4i32;
16459
16460 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16461
16462 // Create the new MMO for the new base load. It is like the original MMO,
16463 // but represents an area in memory almost twice the vector size centered
16464 // on the original address. If the address is unaligned, we might start
16465 // reading up to (sizeof(vector)-1) bytes below the address of the
16466 // original unaligned load.
16468 MachineMemOperand *BaseMMO =
16469 MF.getMachineMemOperand(LD->getMemOperand(),
16470 -(int64_t)MemVT.getStoreSize()+1,
16471 2*MemVT.getStoreSize()-1);
16472
16473 // Create the new base load.
16474 SDValue LDXIntID =
16475 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16476 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16477 SDValue BaseLoad =
16479 DAG.getVTList(PermTy, MVT::Other),
16480 BaseLoadOps, LDTy, BaseMMO);
16481
16482 // Note that the value of IncOffset (which is provided to the next
16483 // load's pointer info offset value, and thus used to calculate the
16484 // alignment), and the value of IncValue (which is actually used to
16485 // increment the pointer value) are different! This is because we
16486 // require the next load to appear to be aligned, even though it
16487 // is actually offset from the base pointer by a lesser amount.
16488 int IncOffset = VT.getSizeInBits() / 8;
16489 int IncValue = IncOffset;
16490
16491 // Walk (both up and down) the chain looking for another load at the real
16492 // (aligned) offset (the alignment of the other load does not matter in
16493 // this case). If found, then do not use the offset reduction trick, as
16494 // that will prevent the loads from being later combined (as they would
16495 // otherwise be duplicates).
16496 if (!findConsecutiveLoad(LD, DAG))
16497 --IncValue;
16498
16499 SDValue Increment =
16500 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16501 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16502
16503 MachineMemOperand *ExtraMMO =
16504 MF.getMachineMemOperand(LD->getMemOperand(),
16505 1, 2*MemVT.getStoreSize()-1);
16506 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16507 SDValue ExtraLoad =
16509 DAG.getVTList(PermTy, MVT::Other),
16510 ExtraLoadOps, LDTy, ExtraMMO);
16511
16512 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16513 BaseLoad.getValue(1), ExtraLoad.getValue(1));
16514
16515 // Because vperm has a big-endian bias, we must reverse the order
16516 // of the input vectors and complement the permute control vector
16517 // when generating little endian code. We have already handled the
16518 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16519 // and ExtraLoad here.
16520 SDValue Perm;
16521 if (isLittleEndian)
16522 Perm = BuildIntrinsicOp(IntrPerm,
16523 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16524 else
16525 Perm = BuildIntrinsicOp(IntrPerm,
16526 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16527
16528 if (VT != PermTy)
16529 Perm = Subtarget.hasAltivec()
16530 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16531 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16532 DAG.getTargetConstant(1, dl, MVT::i64));
16533 // second argument is 1 because this rounding
16534 // is always exact.
16535
16536 // The output of the permutation is our loaded result, the TokenFactor is
16537 // our new chain.
16538 DCI.CombineTo(N, Perm, TF);
16539 return SDValue(N, 0);
16540 }
16541 }
16542 break;
16544 bool isLittleEndian = Subtarget.isLittleEndian();
16545 unsigned IID = N->getConstantOperandVal(0);
16546 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16547 : Intrinsic::ppc_altivec_lvsl);
16548 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16549 SDValue Add = N->getOperand(1);
16550
16551 int Bits = 4 /* 16 byte alignment */;
16552
16553 if (DAG.MaskedValueIsZero(Add->getOperand(1),
16554 APInt::getAllOnes(Bits /* alignment */)
16555 .zext(Add.getScalarValueSizeInBits()))) {
16556 SDNode *BasePtr = Add->getOperand(0).getNode();
16557 for (SDNode *U : BasePtr->users()) {
16558 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16559 U->getConstantOperandVal(0) == IID) {
16560 // We've found another LVSL/LVSR, and this address is an aligned
16561 // multiple of that one. The results will be the same, so use the
16562 // one we've just found instead.
16563
16564 return SDValue(U, 0);
16565 }
16566 }
16567 }
16568
16569 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16570 SDNode *BasePtr = Add->getOperand(0).getNode();
16571 for (SDNode *U : BasePtr->users()) {
16572 if (U->getOpcode() == ISD::ADD &&
16573 isa<ConstantSDNode>(U->getOperand(1)) &&
16574 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16575 (1ULL << Bits) ==
16576 0) {
16577 SDNode *OtherAdd = U;
16578 for (SDNode *V : OtherAdd->users()) {
16579 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16580 V->getConstantOperandVal(0) == IID) {
16581 return SDValue(V, 0);
16582 }
16583 }
16584 }
16585 }
16586 }
16587 }
16588
16589 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16590 // Expose the vabsduw/h/b opportunity for down stream
16591 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16592 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16593 IID == Intrinsic::ppc_altivec_vmaxsh ||
16594 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16595 SDValue V1 = N->getOperand(1);
16596 SDValue V2 = N->getOperand(2);
16597 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16598 V1.getSimpleValueType() == MVT::v8i16 ||
16599 V1.getSimpleValueType() == MVT::v16i8) &&
16600 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16601 // (0-a, a)
16602 if (V1.getOpcode() == ISD::SUB &&
16604 V1.getOperand(1) == V2) {
16605 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16606 }
16607 // (a, 0-a)
16608 if (V2.getOpcode() == ISD::SUB &&
16609 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16610 V2.getOperand(1) == V1) {
16611 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16612 }
16613 // (x-y, y-x)
16614 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16615 V1.getOperand(0) == V2.getOperand(1) &&
16616 V1.getOperand(1) == V2.getOperand(0)) {
16617 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16618 }
16619 }
16620 }
16621 }
16622
16623 break;
16625 switch (N->getConstantOperandVal(1)) {
16626 default:
16627 break;
16628 case Intrinsic::ppc_altivec_vsum4sbs:
16629 case Intrinsic::ppc_altivec_vsum4shs:
16630 case Intrinsic::ppc_altivec_vsum4ubs: {
16631 // These sum-across intrinsics only have a chain due to the side effect
16632 // that they may set the SAT bit. If we know the SAT bit will not be set
16633 // for some inputs, we can replace any uses of their chain with the
16634 // input chain.
16635 if (BuildVectorSDNode *BVN =
16636 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16637 APInt APSplatBits, APSplatUndef;
16638 unsigned SplatBitSize;
16639 bool HasAnyUndefs;
16640 bool BVNIsConstantSplat = BVN->isConstantSplat(
16641 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16642 !Subtarget.isLittleEndian());
16643 // If the constant splat vector is 0, the SAT bit will not be set.
16644 if (BVNIsConstantSplat && APSplatBits == 0)
16645 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16646 }
16647 return SDValue();
16648 }
16649 case Intrinsic::ppc_vsx_lxvw4x:
16650 case Intrinsic::ppc_vsx_lxvd2x:
16651 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16652 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16653 if (Subtarget.needsSwapsForVSXMemOps())
16654 return expandVSXLoadForLE(N, DCI);
16655 break;
16656 }
16657 break;
16659 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16660 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16661 if (Subtarget.needsSwapsForVSXMemOps()) {
16662 switch (N->getConstantOperandVal(1)) {
16663 default:
16664 break;
16665 case Intrinsic::ppc_vsx_stxvw4x:
16666 case Intrinsic::ppc_vsx_stxvd2x:
16667 return expandVSXStoreForLE(N, DCI);
16668 }
16669 }
16670 break;
16671 case ISD::BSWAP: {
16672 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16673 // For subtargets without LDBRX, we can still do better than the default
16674 // expansion even for 64-bit BSWAP (LOAD).
16675 bool Is64BitBswapOn64BitTgt =
16676 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16677 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16678 N->getOperand(0).hasOneUse();
16679 if (IsSingleUseNormalLd &&
16680 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16681 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16682 SDValue Load = N->getOperand(0);
16683 LoadSDNode *LD = cast<LoadSDNode>(Load);
16684 // Create the byte-swapping load.
16685 SDValue Ops[] = {
16686 LD->getChain(), // Chain
16687 LD->getBasePtr(), // Ptr
16688 DAG.getValueType(N->getValueType(0)) // VT
16689 };
16690 SDValue BSLoad =
16692 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16693 MVT::i64 : MVT::i32, MVT::Other),
16694 Ops, LD->getMemoryVT(), LD->getMemOperand());
16695
16696 // If this is an i16 load, insert the truncate.
16697 SDValue ResVal = BSLoad;
16698 if (N->getValueType(0) == MVT::i16)
16699 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16700
16701 // First, combine the bswap away. This makes the value produced by the
16702 // load dead.
16703 DCI.CombineTo(N, ResVal);
16704
16705 // Next, combine the load away, we give it a bogus result value but a real
16706 // chain result. The result value is dead because the bswap is dead.
16707 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16708
16709 // Return N so it doesn't get rechecked!
16710 return SDValue(N, 0);
16711 }
16712 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16713 // before legalization so that the BUILD_PAIR is handled correctly.
16714 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16715 !IsSingleUseNormalLd)
16716 return SDValue();
16717 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16718
16719 // Can't split volatile or atomic loads.
16720 if (!LD->isSimple())
16721 return SDValue();
16722 SDValue BasePtr = LD->getBasePtr();
16723 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16724 LD->getPointerInfo(), LD->getAlign());
16725 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16726 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16727 DAG.getIntPtrConstant(4, dl));
16729 LD->getMemOperand(), 4, 4);
16730 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16731 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16732 SDValue Res;
16733 if (Subtarget.isLittleEndian())
16734 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16735 else
16736 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16737 SDValue TF =
16738 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16739 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16740 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16741 return Res;
16742 }
16743 case PPCISD::VCMP:
16744 // If a VCMP_rec node already exists with exactly the same operands as this
16745 // node, use its result instead of this node (VCMP_rec computes both a CR6
16746 // and a normal output).
16747 //
16748 if (!N->getOperand(0).hasOneUse() &&
16749 !N->getOperand(1).hasOneUse() &&
16750 !N->getOperand(2).hasOneUse()) {
16751
16752 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16753 SDNode *VCMPrecNode = nullptr;
16754
16755 SDNode *LHSN = N->getOperand(0).getNode();
16756 for (SDNode *User : LHSN->users())
16757 if (User->getOpcode() == PPCISD::VCMP_rec &&
16758 User->getOperand(1) == N->getOperand(1) &&
16759 User->getOperand(2) == N->getOperand(2) &&
16760 User->getOperand(0) == N->getOperand(0)) {
16761 VCMPrecNode = User;
16762 break;
16763 }
16764
16765 // If there is no VCMP_rec node, or if the flag value has a single use,
16766 // don't transform this.
16767 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16768 break;
16769
16770 // Look at the (necessarily single) use of the flag value. If it has a
16771 // chain, this transformation is more complex. Note that multiple things
16772 // could use the value result, which we should ignore.
16773 SDNode *FlagUser = nullptr;
16774 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16775 FlagUser == nullptr; ++UI) {
16776 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16777 SDNode *User = UI->getUser();
16778 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16779 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16780 FlagUser = User;
16781 break;
16782 }
16783 }
16784 }
16785
16786 // If the user is a MFOCRF instruction, we know this is safe.
16787 // Otherwise we give up for right now.
16788 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16789 return SDValue(VCMPrecNode, 0);
16790 }
16791 break;
16792 case ISD::BR_CC: {
16793 // If this is a branch on an altivec predicate comparison, lower this so
16794 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16795 // lowering is done pre-legalize, because the legalizer lowers the predicate
16796 // compare down to code that is difficult to reassemble.
16797 // This code also handles branches that depend on the result of a store
16798 // conditional.
16799 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16800 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16801
16802 int CompareOpc;
16803 bool isDot;
16804
16805 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16806 break;
16807
16808 // Since we are doing this pre-legalize, the RHS can be a constant of
16809 // arbitrary bitwidth which may cause issues when trying to get the value
16810 // from the underlying APInt.
16811 auto RHSAPInt = RHS->getAsAPIntVal();
16812 if (!RHSAPInt.isIntN(64))
16813 break;
16814
16815 unsigned Val = RHSAPInt.getZExtValue();
16816 auto isImpossibleCompare = [&]() {
16817 // If this is a comparison against something other than 0/1, then we know
16818 // that the condition is never/always true.
16819 if (Val != 0 && Val != 1) {
16820 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16821 return N->getOperand(0);
16822 // Always !=, turn it into an unconditional branch.
16823 return DAG.getNode(ISD::BR, dl, MVT::Other,
16824 N->getOperand(0), N->getOperand(4));
16825 }
16826 return SDValue();
16827 };
16828 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16829 unsigned StoreWidth = 0;
16830 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16831 isStoreConditional(LHS, StoreWidth)) {
16832 if (SDValue Impossible = isImpossibleCompare())
16833 return Impossible;
16834 PPC::Predicate CompOpc;
16835 // eq 0 => ne
16836 // ne 0 => eq
16837 // eq 1 => eq
16838 // ne 1 => ne
16839 if (Val == 0)
16840 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16841 else
16842 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16843
16844 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16845 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16846 auto *MemNode = cast<MemSDNode>(LHS);
16847 SDValue ConstSt = DAG.getMemIntrinsicNode(
16849 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16850 MemNode->getMemoryVT(), MemNode->getMemOperand());
16851
16852 SDValue InChain;
16853 // Unchain the branch from the original store conditional.
16854 if (N->getOperand(0) == LHS.getValue(1))
16855 InChain = LHS.getOperand(0);
16856 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16857 SmallVector<SDValue, 4> InChains;
16858 SDValue InTF = N->getOperand(0);
16859 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16860 if (InTF.getOperand(i) != LHS.getValue(1))
16861 InChains.push_back(InTF.getOperand(i));
16862 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16863 }
16864
16865 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16866 DAG.getConstant(CompOpc, dl, MVT::i32),
16867 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16868 ConstSt.getValue(2));
16869 }
16870
16871 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16872 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16873 assert(isDot && "Can't compare against a vector result!");
16874
16875 if (SDValue Impossible = isImpossibleCompare())
16876 return Impossible;
16877
16878 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16879 // Create the PPCISD altivec 'dot' comparison node.
16880 SDValue Ops[] = {
16881 LHS.getOperand(2), // LHS of compare
16882 LHS.getOperand(3), // RHS of compare
16883 DAG.getConstant(CompareOpc, dl, MVT::i32)
16884 };
16885 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16886 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16887
16888 // Unpack the result based on how the target uses it.
16889 PPC::Predicate CompOpc;
16890 switch (LHS.getConstantOperandVal(1)) {
16891 default: // Can't happen, don't crash on invalid number though.
16892 case 0: // Branch on the value of the EQ bit of CR6.
16893 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16894 break;
16895 case 1: // Branch on the inverted value of the EQ bit of CR6.
16896 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16897 break;
16898 case 2: // Branch on the value of the LT bit of CR6.
16899 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16900 break;
16901 case 3: // Branch on the inverted value of the LT bit of CR6.
16902 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16903 break;
16904 }
16905
16906 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16907 DAG.getConstant(CompOpc, dl, MVT::i32),
16908 DAG.getRegister(PPC::CR6, MVT::i32),
16909 N->getOperand(4), CompNode.getValue(1));
16910 }
16911 break;
16912 }
16913 case ISD::BUILD_VECTOR:
16914 return DAGCombineBuildVector(N, DCI);
16915 }
16916
16917 return SDValue();
16918}
16919
16920SDValue
16922 SelectionDAG &DAG,
16923 SmallVectorImpl<SDNode *> &Created) const {
16924 // fold (sdiv X, pow2)
16925 EVT VT = N->getValueType(0);
16926 if (VT == MVT::i64 && !Subtarget.isPPC64())
16927 return SDValue();
16928 if ((VT != MVT::i32 && VT != MVT::i64) ||
16929 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16930 return SDValue();
16931
16932 SDLoc DL(N);
16933 SDValue N0 = N->getOperand(0);
16934
16935 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16936 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16937 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16938
16939 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16940 Created.push_back(Op.getNode());
16941
16942 if (IsNegPow2) {
16943 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16944 Created.push_back(Op.getNode());
16945 }
16946
16947 return Op;
16948}
16949
16950//===----------------------------------------------------------------------===//
16951// Inline Assembly Support
16952//===----------------------------------------------------------------------===//
16953
16955 KnownBits &Known,
16956 const APInt &DemandedElts,
16957 const SelectionDAG &DAG,
16958 unsigned Depth) const {
16959 Known.resetAll();
16960 switch (Op.getOpcode()) {
16961 default: break;
16962 case PPCISD::LBRX: {
16963 // lhbrx is known to have the top bits cleared out.
16964 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16965 Known.Zero = 0xFFFF0000;
16966 break;
16967 }
16969 switch (Op.getConstantOperandVal(0)) {
16970 default: break;
16971 case Intrinsic::ppc_altivec_vcmpbfp_p:
16972 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16973 case Intrinsic::ppc_altivec_vcmpequb_p:
16974 case Intrinsic::ppc_altivec_vcmpequh_p:
16975 case Intrinsic::ppc_altivec_vcmpequw_p:
16976 case Intrinsic::ppc_altivec_vcmpequd_p:
16977 case Intrinsic::ppc_altivec_vcmpequq_p:
16978 case Intrinsic::ppc_altivec_vcmpgefp_p:
16979 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16980 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16981 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16982 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16983 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16984 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16985 case Intrinsic::ppc_altivec_vcmpgtub_p:
16986 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16987 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16988 case Intrinsic::ppc_altivec_vcmpgtud_p:
16989 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16990 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16991 break;
16992 }
16993 break;
16994 }
16996 switch (Op.getConstantOperandVal(1)) {
16997 default:
16998 break;
16999 case Intrinsic::ppc_load2r:
17000 // Top bits are cleared for load2r (which is the same as lhbrx).
17001 Known.Zero = 0xFFFF0000;
17002 break;
17003 }
17004 break;
17005 }
17006 }
17007}
17008
17010 switch (Subtarget.getCPUDirective()) {
17011 default: break;
17012 case PPC::DIR_970:
17013 case PPC::DIR_PWR4:
17014 case PPC::DIR_PWR5:
17015 case PPC::DIR_PWR5X:
17016 case PPC::DIR_PWR6:
17017 case PPC::DIR_PWR6X:
17018 case PPC::DIR_PWR7:
17019 case PPC::DIR_PWR8:
17020 case PPC::DIR_PWR9:
17021 case PPC::DIR_PWR10:
17022 case PPC::DIR_PWR11:
17023 case PPC::DIR_PWR_FUTURE: {
17024 if (!ML)
17025 break;
17026
17028 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
17029 // so that we can decrease cache misses and branch-prediction misses.
17030 // Actual alignment of the loop will depend on the hotness check and other
17031 // logic in alignBlocks.
17032 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
17033 return Align(32);
17034 }
17035
17036 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
17037
17038 // For small loops (between 5 and 8 instructions), align to a 32-byte
17039 // boundary so that the entire loop fits in one instruction-cache line.
17040 uint64_t LoopSize = 0;
17041 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
17042 for (const MachineInstr &J : **I) {
17043 LoopSize += TII->getInstSizeInBytes(J);
17044 if (LoopSize > 32)
17045 break;
17046 }
17047
17048 if (LoopSize > 16 && LoopSize <= 32)
17049 return Align(32);
17050
17051 break;
17052 }
17053 }
17054
17056}
17057
17058/// getConstraintType - Given a constraint, return the type of
17059/// constraint it is for this target.
17062 if (Constraint.size() == 1) {
17063 switch (Constraint[0]) {
17064 default: break;
17065 case 'b':
17066 case 'r':
17067 case 'f':
17068 case 'd':
17069 case 'v':
17070 case 'y':
17071 return C_RegisterClass;
17072 case 'Z':
17073 // FIXME: While Z does indicate a memory constraint, it specifically
17074 // indicates an r+r address (used in conjunction with the 'y' modifier
17075 // in the replacement string). Currently, we're forcing the base
17076 // register to be r0 in the asm printer (which is interpreted as zero)
17077 // and forming the complete address in the second register. This is
17078 // suboptimal.
17079 return C_Memory;
17080 }
17081 } else if (Constraint == "wc") { // individual CR bits.
17082 return C_RegisterClass;
17083 } else if (Constraint == "wa" || Constraint == "wd" ||
17084 Constraint == "wf" || Constraint == "ws" ||
17085 Constraint == "wi" || Constraint == "ww") {
17086 return C_RegisterClass; // VSX registers.
17087 }
17088 return TargetLowering::getConstraintType(Constraint);
17089}
17090
17091/// Examine constraint type and operand type and determine a weight value.
17092/// This object must already have been set up with the operand type
17093/// and the current alternative constraint selected.
17096 AsmOperandInfo &info, const char *constraint) const {
17098 Value *CallOperandVal = info.CallOperandVal;
17099 // If we don't have a value, we can't do a match,
17100 // but allow it at the lowest weight.
17101 if (!CallOperandVal)
17102 return CW_Default;
17103 Type *type = CallOperandVal->getType();
17104
17105 // Look at the constraint type.
17106 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
17107 return CW_Register; // an individual CR bit.
17108 else if ((StringRef(constraint) == "wa" ||
17109 StringRef(constraint) == "wd" ||
17110 StringRef(constraint) == "wf") &&
17111 type->isVectorTy())
17112 return CW_Register;
17113 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
17114 return CW_Register; // just hold 64-bit integers data.
17115 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
17116 return CW_Register;
17117 else if (StringRef(constraint) == "ww" && type->isFloatTy())
17118 return CW_Register;
17119
17120 switch (*constraint) {
17121 default:
17123 break;
17124 case 'b':
17125 if (type->isIntegerTy())
17126 weight = CW_Register;
17127 break;
17128 case 'f':
17129 if (type->isFloatTy())
17130 weight = CW_Register;
17131 break;
17132 case 'd':
17133 if (type->isDoubleTy())
17134 weight = CW_Register;
17135 break;
17136 case 'v':
17137 if (type->isVectorTy())
17138 weight = CW_Register;
17139 break;
17140 case 'y':
17141 weight = CW_Register;
17142 break;
17143 case 'Z':
17144 weight = CW_Memory;
17145 break;
17146 }
17147 return weight;
17148}
17149
17150std::pair<unsigned, const TargetRegisterClass *>
17152 StringRef Constraint,
17153 MVT VT) const {
17154 if (Constraint.size() == 1) {
17155 // GCC RS6000 Constraint Letters
17156 switch (Constraint[0]) {
17157 case 'b': // R1-R31
17158 if (VT == MVT::i64 && Subtarget.isPPC64())
17159 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
17160 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
17161 case 'r': // R0-R31
17162 if (VT == MVT::i64 && Subtarget.isPPC64())
17163 return std::make_pair(0U, &PPC::G8RCRegClass);
17164 return std::make_pair(0U, &PPC::GPRCRegClass);
17165 // 'd' and 'f' constraints are both defined to be "the floating point
17166 // registers", where one is for 32-bit and the other for 64-bit. We don't
17167 // really care overly much here so just give them all the same reg classes.
17168 case 'd':
17169 case 'f':
17170 if (Subtarget.hasSPE()) {
17171 if (VT == MVT::f32 || VT == MVT::i32)
17172 return std::make_pair(0U, &PPC::GPRCRegClass);
17173 if (VT == MVT::f64 || VT == MVT::i64)
17174 return std::make_pair(0U, &PPC::SPERCRegClass);
17175 } else {
17176 if (VT == MVT::f32 || VT == MVT::i32)
17177 return std::make_pair(0U, &PPC::F4RCRegClass);
17178 if (VT == MVT::f64 || VT == MVT::i64)
17179 return std::make_pair(0U, &PPC::F8RCRegClass);
17180 }
17181 break;
17182 case 'v':
17183 if (Subtarget.hasAltivec() && VT.isVector())
17184 return std::make_pair(0U, &PPC::VRRCRegClass);
17185 else if (Subtarget.hasVSX())
17186 // Scalars in Altivec registers only make sense with VSX.
17187 return std::make_pair(0U, &PPC::VFRCRegClass);
17188 break;
17189 case 'y': // crrc
17190 return std::make_pair(0U, &PPC::CRRCRegClass);
17191 }
17192 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
17193 // An individual CR bit.
17194 return std::make_pair(0U, &PPC::CRBITRCRegClass);
17195 } else if ((Constraint == "wa" || Constraint == "wd" ||
17196 Constraint == "wf" || Constraint == "wi") &&
17197 Subtarget.hasVSX()) {
17198 // A VSX register for either a scalar (FP) or vector. There is no
17199 // support for single precision scalars on subtargets prior to Power8.
17200 if (VT.isVector())
17201 return std::make_pair(0U, &PPC::VSRCRegClass);
17202 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17203 return std::make_pair(0U, &PPC::VSSRCRegClass);
17204 return std::make_pair(0U, &PPC::VSFRCRegClass);
17205 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
17206 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17207 return std::make_pair(0U, &PPC::VSSRCRegClass);
17208 else
17209 return std::make_pair(0U, &PPC::VSFRCRegClass);
17210 } else if (Constraint == "lr") {
17211 if (VT == MVT::i64)
17212 return std::make_pair(0U, &PPC::LR8RCRegClass);
17213 else
17214 return std::make_pair(0U, &PPC::LRRCRegClass);
17215 }
17216
17217 // Handle special cases of physical registers that are not properly handled
17218 // by the base class.
17219 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
17220 // If we name a VSX register, we can't defer to the base class because it
17221 // will not recognize the correct register (their names will be VSL{0-31}
17222 // and V{0-31} so they won't match). So we match them here.
17223 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
17224 int VSNum = atoi(Constraint.data() + 3);
17225 assert(VSNum >= 0 && VSNum <= 63 &&
17226 "Attempted to access a vsr out of range");
17227 if (VSNum < 32)
17228 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
17229 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
17230 }
17231
17232 // For float registers, we can't defer to the base class as it will match
17233 // the SPILLTOVSRRC class.
17234 if (Constraint.size() > 3 && Constraint[1] == 'f') {
17235 int RegNum = atoi(Constraint.data() + 2);
17236 if (RegNum > 31 || RegNum < 0)
17237 report_fatal_error("Invalid floating point register number");
17238 if (VT == MVT::f32 || VT == MVT::i32)
17239 return Subtarget.hasSPE()
17240 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
17241 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
17242 if (VT == MVT::f64 || VT == MVT::i64)
17243 return Subtarget.hasSPE()
17244 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
17245 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
17246 }
17247 }
17248
17249 std::pair<unsigned, const TargetRegisterClass *> R =
17251
17252 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
17253 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
17254 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
17255 // register.
17256 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
17257 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
17258 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
17259 PPC::GPRCRegClass.contains(R.first))
17260 return std::make_pair(TRI->getMatchingSuperReg(R.first,
17261 PPC::sub_32, &PPC::G8RCRegClass),
17262 &PPC::G8RCRegClass);
17263
17264 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
17265 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
17266 R.first = PPC::CR0;
17267 R.second = &PPC::CRRCRegClass;
17268 }
17269 // FIXME: This warning should ideally be emitted in the front end.
17270 const auto &TM = getTargetMachine();
17271 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
17272 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
17273 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
17274 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
17275 errs() << "warning: vector registers 20 to 32 are reserved in the "
17276 "default AIX AltiVec ABI and cannot be used\n";
17277 }
17278
17279 return R;
17280}
17281
17282/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
17283/// vector. If it is invalid, don't add anything to Ops.
17285 StringRef Constraint,
17286 std::vector<SDValue> &Ops,
17287 SelectionDAG &DAG) const {
17288 SDValue Result;
17289
17290 // Only support length 1 constraints.
17291 if (Constraint.size() > 1)
17292 return;
17293
17294 char Letter = Constraint[0];
17295 switch (Letter) {
17296 default: break;
17297 case 'I':
17298 case 'J':
17299 case 'K':
17300 case 'L':
17301 case 'M':
17302 case 'N':
17303 case 'O':
17304 case 'P': {
17305 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
17306 if (!CST) return; // Must be an immediate to match.
17307 SDLoc dl(Op);
17308 int64_t Value = CST->getSExtValue();
17309 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
17310 // numbers are printed as such.
17311 switch (Letter) {
17312 default: llvm_unreachable("Unknown constraint letter!");
17313 case 'I': // "I" is a signed 16-bit constant.
17314 if (isInt<16>(Value))
17315 Result = DAG.getTargetConstant(Value, dl, TCVT);
17316 break;
17317 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
17318 if (isShiftedUInt<16, 16>(Value))
17319 Result = DAG.getTargetConstant(Value, dl, TCVT);
17320 break;
17321 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
17322 if (isShiftedInt<16, 16>(Value))
17323 Result = DAG.getTargetConstant(Value, dl, TCVT);
17324 break;
17325 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
17326 if (isUInt<16>(Value))
17327 Result = DAG.getTargetConstant(Value, dl, TCVT);
17328 break;
17329 case 'M': // "M" is a constant that is greater than 31.
17330 if (Value > 31)
17331 Result = DAG.getTargetConstant(Value, dl, TCVT);
17332 break;
17333 case 'N': // "N" is a positive constant that is an exact power of two.
17334 if (Value > 0 && isPowerOf2_64(Value))
17335 Result = DAG.getTargetConstant(Value, dl, TCVT);
17336 break;
17337 case 'O': // "O" is the constant zero.
17338 if (Value == 0)
17339 Result = DAG.getTargetConstant(Value, dl, TCVT);
17340 break;
17341 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
17342 if (isInt<16>(-Value))
17343 Result = DAG.getTargetConstant(Value, dl, TCVT);
17344 break;
17345 }
17346 break;
17347 }
17348 }
17349
17350 if (Result.getNode()) {
17351 Ops.push_back(Result);
17352 return;
17353 }
17354
17355 // Handle standard constraint letters.
17356 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17357}
17358
17361 SelectionDAG &DAG) const {
17362 if (I.getNumOperands() <= 1)
17363 return;
17364 if (!isa<ConstantSDNode>(Ops[1].getNode()))
17365 return;
17366 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17367 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17368 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17369 return;
17370
17371 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17372 Ops.push_back(DAG.getMDNode(MDN));
17373}
17374
17375// isLegalAddressingMode - Return true if the addressing mode represented
17376// by AM is legal for this target, for a load/store of the specified type.
17378 const AddrMode &AM, Type *Ty,
17379 unsigned AS,
17380 Instruction *I) const {
17381 // Vector type r+i form is supported since power9 as DQ form. We don't check
17382 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17383 // imm form is preferred and the offset can be adjusted to use imm form later
17384 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17385 // max offset to check legal addressing mode, we should be a little aggressive
17386 // to contain other offsets for that LSRUse.
17387 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17388 return false;
17389
17390 // PPC allows a sign-extended 16-bit immediate field.
17391 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17392 return false;
17393
17394 // No global is ever allowed as a base.
17395 if (AM.BaseGV)
17396 return false;
17397
17398 // PPC only support r+r,
17399 switch (AM.Scale) {
17400 case 0: // "r+i" or just "i", depending on HasBaseReg.
17401 break;
17402 case 1:
17403 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
17404 return false;
17405 // Otherwise we have r+r or r+i.
17406 break;
17407 case 2:
17408 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
17409 return false;
17410 // Allow 2*r as r+r.
17411 break;
17412 default:
17413 // No other scales are supported.
17414 return false;
17415 }
17416
17417 return true;
17418}
17419
17420SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17421 SelectionDAG &DAG) const {
17423 MachineFrameInfo &MFI = MF.getFrameInfo();
17424 MFI.setReturnAddressIsTaken(true);
17425
17427 return SDValue();
17428
17429 SDLoc dl(Op);
17430 unsigned Depth = Op.getConstantOperandVal(0);
17431
17432 // Make sure the function does not optimize away the store of the RA to
17433 // the stack.
17434 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17435 FuncInfo->setLRStoreRequired();
17436 auto PtrVT = getPointerTy(MF.getDataLayout());
17437
17438 if (Depth > 0) {
17439 // The link register (return address) is saved in the caller's frame
17440 // not the callee's stack frame. So we must get the caller's frame
17441 // address and load the return address at the LR offset from there.
17442 SDValue FrameAddr =
17443 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17444 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17445 SDValue Offset =
17446 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17447 Subtarget.getScalarIntVT());
17448 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17449 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17451 }
17452
17453 // Just load the return address off the stack.
17454 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17455 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17457}
17458
17459SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17460 SelectionDAG &DAG) const {
17461 SDLoc dl(Op);
17462 unsigned Depth = Op.getConstantOperandVal(0);
17463
17465 MachineFrameInfo &MFI = MF.getFrameInfo();
17466 MFI.setFrameAddressIsTaken(true);
17467
17468 EVT PtrVT = getPointerTy(MF.getDataLayout());
17469 bool isPPC64 = PtrVT == MVT::i64;
17470
17471 // Naked functions never have a frame pointer, and so we use r1. For all
17472 // other functions, this decision must be delayed until during PEI.
17473 unsigned FrameReg;
17474 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17475 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17476 else
17477 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17478
17479 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17480 PtrVT);
17481 while (Depth--)
17482 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17483 FrameAddr, MachinePointerInfo());
17484 return FrameAddr;
17485}
17486
17487#define GET_REGISTER_MATCHER
17488#include "PPCGenAsmMatcher.inc"
17489
17491 const MachineFunction &MF) const {
17492 bool IsPPC64 = Subtarget.isPPC64();
17493
17494 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
17495 if (!Is64Bit && VT != LLT::scalar(32))
17496 report_fatal_error("Invalid register global variable type");
17497
17499 if (!Reg)
17501 Twine("Invalid global name register \"" + StringRef(RegName) + "\"."));
17502
17503 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
17504 // Need followup investigation as to why.
17505 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
17506 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
17507 StringRef(RegName) + "\"."));
17508
17509 // Convert GPR to GP8R register for 64bit.
17510 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
17511 Reg = Reg.id() - PPC::R0 + PPC::X0;
17512
17513 return Reg;
17514}
17515
17517 // 32-bit SVR4 ABI access everything as got-indirect.
17518 if (Subtarget.is32BitELFABI())
17519 return true;
17520
17521 // AIX accesses everything indirectly through the TOC, which is similar to
17522 // the GOT.
17523 if (Subtarget.isAIXABI())
17524 return true;
17525
17527 // If it is small or large code model, module locals are accessed
17528 // indirectly by loading their address from .toc/.got.
17529 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17530 return true;
17531
17532 // JumpTable and BlockAddress are accessed as got-indirect.
17533 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17534 return true;
17535
17536 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17537 return Subtarget.isGVIndirectSymbol(G->getGlobal());
17538
17539 return false;
17540}
17541
17542bool
17544 // The PowerPC target isn't yet aware of offsets.
17545 return false;
17546}
17547
17549 const CallInst &I,
17550 MachineFunction &MF,
17551 unsigned Intrinsic) const {
17552 switch (Intrinsic) {
17553 case Intrinsic::ppc_atomicrmw_xchg_i128:
17554 case Intrinsic::ppc_atomicrmw_add_i128:
17555 case Intrinsic::ppc_atomicrmw_sub_i128:
17556 case Intrinsic::ppc_atomicrmw_nand_i128:
17557 case Intrinsic::ppc_atomicrmw_and_i128:
17558 case Intrinsic::ppc_atomicrmw_or_i128:
17559 case Intrinsic::ppc_atomicrmw_xor_i128:
17560 case Intrinsic::ppc_cmpxchg_i128:
17562 Info.memVT = MVT::i128;
17563 Info.ptrVal = I.getArgOperand(0);
17564 Info.offset = 0;
17565 Info.align = Align(16);
17568 return true;
17569 case Intrinsic::ppc_atomic_load_i128:
17571 Info.memVT = MVT::i128;
17572 Info.ptrVal = I.getArgOperand(0);
17573 Info.offset = 0;
17574 Info.align = Align(16);
17576 return true;
17577 case Intrinsic::ppc_atomic_store_i128:
17579 Info.memVT = MVT::i128;
17580 Info.ptrVal = I.getArgOperand(2);
17581 Info.offset = 0;
17582 Info.align = Align(16);
17584 return true;
17585 case Intrinsic::ppc_altivec_lvx:
17586 case Intrinsic::ppc_altivec_lvxl:
17587 case Intrinsic::ppc_altivec_lvebx:
17588 case Intrinsic::ppc_altivec_lvehx:
17589 case Intrinsic::ppc_altivec_lvewx:
17590 case Intrinsic::ppc_vsx_lxvd2x:
17591 case Intrinsic::ppc_vsx_lxvw4x:
17592 case Intrinsic::ppc_vsx_lxvd2x_be:
17593 case Intrinsic::ppc_vsx_lxvw4x_be:
17594 case Intrinsic::ppc_vsx_lxvl:
17595 case Intrinsic::ppc_vsx_lxvll: {
17596 EVT VT;
17597 switch (Intrinsic) {
17598 case Intrinsic::ppc_altivec_lvebx:
17599 VT = MVT::i8;
17600 break;
17601 case Intrinsic::ppc_altivec_lvehx:
17602 VT = MVT::i16;
17603 break;
17604 case Intrinsic::ppc_altivec_lvewx:
17605 VT = MVT::i32;
17606 break;
17607 case Intrinsic::ppc_vsx_lxvd2x:
17608 case Intrinsic::ppc_vsx_lxvd2x_be:
17609 VT = MVT::v2f64;
17610 break;
17611 default:
17612 VT = MVT::v4i32;
17613 break;
17614 }
17615
17617 Info.memVT = VT;
17618 Info.ptrVal = I.getArgOperand(0);
17619 Info.offset = -VT.getStoreSize()+1;
17620 Info.size = 2*VT.getStoreSize()-1;
17621 Info.align = Align(1);
17623 return true;
17624 }
17625 case Intrinsic::ppc_altivec_stvx:
17626 case Intrinsic::ppc_altivec_stvxl:
17627 case Intrinsic::ppc_altivec_stvebx:
17628 case Intrinsic::ppc_altivec_stvehx:
17629 case Intrinsic::ppc_altivec_stvewx:
17630 case Intrinsic::ppc_vsx_stxvd2x:
17631 case Intrinsic::ppc_vsx_stxvw4x:
17632 case Intrinsic::ppc_vsx_stxvd2x_be:
17633 case Intrinsic::ppc_vsx_stxvw4x_be:
17634 case Intrinsic::ppc_vsx_stxvl:
17635 case Intrinsic::ppc_vsx_stxvll: {
17636 EVT VT;
17637 switch (Intrinsic) {
17638 case Intrinsic::ppc_altivec_stvebx:
17639 VT = MVT::i8;
17640 break;
17641 case Intrinsic::ppc_altivec_stvehx:
17642 VT = MVT::i16;
17643 break;
17644 case Intrinsic::ppc_altivec_stvewx:
17645 VT = MVT::i32;
17646 break;
17647 case Intrinsic::ppc_vsx_stxvd2x:
17648 case Intrinsic::ppc_vsx_stxvd2x_be:
17649 VT = MVT::v2f64;
17650 break;
17651 default:
17652 VT = MVT::v4i32;
17653 break;
17654 }
17655
17657 Info.memVT = VT;
17658 Info.ptrVal = I.getArgOperand(1);
17659 Info.offset = -VT.getStoreSize()+1;
17660 Info.size = 2*VT.getStoreSize()-1;
17661 Info.align = Align(1);
17663 return true;
17664 }
17665 case Intrinsic::ppc_stdcx:
17666 case Intrinsic::ppc_stwcx:
17667 case Intrinsic::ppc_sthcx:
17668 case Intrinsic::ppc_stbcx: {
17669 EVT VT;
17670 auto Alignment = Align(8);
17671 switch (Intrinsic) {
17672 case Intrinsic::ppc_stdcx:
17673 VT = MVT::i64;
17674 break;
17675 case Intrinsic::ppc_stwcx:
17676 VT = MVT::i32;
17677 Alignment = Align(4);
17678 break;
17679 case Intrinsic::ppc_sthcx:
17680 VT = MVT::i16;
17681 Alignment = Align(2);
17682 break;
17683 case Intrinsic::ppc_stbcx:
17684 VT = MVT::i8;
17685 Alignment = Align(1);
17686 break;
17687 }
17689 Info.memVT = VT;
17690 Info.ptrVal = I.getArgOperand(0);
17691 Info.offset = 0;
17692 Info.align = Alignment;
17694 return true;
17695 }
17696 default:
17697 break;
17698 }
17699
17700 return false;
17701}
17702
17703/// It returns EVT::Other if the type should be determined using generic
17704/// target-independent logic.
17706 const MemOp &Op, const AttributeList &FuncAttributes) const {
17707 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17708 // We should use Altivec/VSX loads and stores when available. For unaligned
17709 // addresses, unaligned VSX loads are only fast starting with the P8.
17710 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17711 if (Op.isMemset() && Subtarget.hasVSX()) {
17712 uint64_t TailSize = Op.size() % 16;
17713 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17714 // element if vector element type matches tail store. For tail size
17715 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17716 if (TailSize > 2 && TailSize <= 4) {
17717 return MVT::v8i16;
17718 }
17719 return MVT::v4i32;
17720 }
17721 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17722 return MVT::v4i32;
17723 }
17724 }
17725
17726 if (Subtarget.isPPC64()) {
17727 return MVT::i64;
17728 }
17729
17730 return MVT::i32;
17731}
17732
17733/// Returns true if it is beneficial to convert a load of a constant
17734/// to just the constant itself.
17736 Type *Ty) const {
17737 assert(Ty->isIntegerTy());
17738
17739 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17740 return !(BitSize == 0 || BitSize > 64);
17741}
17742
17744 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17745 return false;
17746 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17747 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17748 return NumBits1 == 64 && NumBits2 == 32;
17749}
17750
17752 if (!VT1.isInteger() || !VT2.isInteger())
17753 return false;
17754 unsigned NumBits1 = VT1.getSizeInBits();
17755 unsigned NumBits2 = VT2.getSizeInBits();
17756 return NumBits1 == 64 && NumBits2 == 32;
17757}
17758
17760 // Generally speaking, zexts are not free, but they are free when they can be
17761 // folded with other operations.
17762 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17763 EVT MemVT = LD->getMemoryVT();
17764 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17765 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17766 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17767 LD->getExtensionType() == ISD::ZEXTLOAD))
17768 return true;
17769 }
17770
17771 // FIXME: Add other cases...
17772 // - 32-bit shifts with a zext to i64
17773 // - zext after ctlz, bswap, etc.
17774 // - zext after and by a constant mask
17775
17776 return TargetLowering::isZExtFree(Val, VT2);
17777}
17778
17779bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17780 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17781 "invalid fpext types");
17782 // Extending to float128 is not free.
17783 if (DestVT == MVT::f128)
17784 return false;
17785 return true;
17786}
17787
17789 return isInt<16>(Imm) || isUInt<16>(Imm);
17790}
17791
17793 return isInt<16>(Imm) || isUInt<16>(Imm);
17794}
17795
17798 unsigned *Fast) const {
17800 return false;
17801
17802 // PowerPC supports unaligned memory access for simple non-vector types.
17803 // Although accessing unaligned addresses is not as efficient as accessing
17804 // aligned addresses, it is generally more efficient than manual expansion,
17805 // and generally only traps for software emulation when crossing page
17806 // boundaries.
17807
17808 if (!VT.isSimple())
17809 return false;
17810
17811 if (VT.isFloatingPoint() && !VT.isVector() &&
17812 !Subtarget.allowsUnalignedFPAccess())
17813 return false;
17814
17815 if (VT.getSimpleVT().isVector()) {
17816 if (Subtarget.hasVSX()) {
17817 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17818 VT != MVT::v4f32 && VT != MVT::v4i32)
17819 return false;
17820 } else {
17821 return false;
17822 }
17823 }
17824
17825 if (VT == MVT::ppcf128)
17826 return false;
17827
17828 if (Fast)
17829 *Fast = 1;
17830
17831 return true;
17832}
17833
17835 SDValue C) const {
17836 // Check integral scalar types.
17837 if (!VT.isScalarInteger())
17838 return false;
17839 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17840 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17841 return false;
17842 // This transformation will generate >= 2 operations. But the following
17843 // cases will generate <= 2 instructions during ISEL. So exclude them.
17844 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17845 // HW instruction, ie. MULLI
17846 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17847 // instruction is needed than case 1, ie. MULLI and RLDICR
17848 int64_t Imm = ConstNode->getSExtValue();
17849 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17850 Imm >>= Shift;
17851 if (isInt<16>(Imm))
17852 return false;
17853 uint64_t UImm = static_cast<uint64_t>(Imm);
17854 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17855 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17856 return true;
17857 }
17858 return false;
17859}
17860
17862 EVT VT) const {
17865}
17866
17868 Type *Ty) const {
17869 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17870 return false;
17871 switch (Ty->getScalarType()->getTypeID()) {
17872 case Type::FloatTyID:
17873 case Type::DoubleTyID:
17874 return true;
17875 case Type::FP128TyID:
17876 return Subtarget.hasP9Vector();
17877 default:
17878 return false;
17879 }
17880}
17881
17882// FIXME: add more patterns which are not profitable to hoist.
17884 if (!I->hasOneUse())
17885 return true;
17886
17887 Instruction *User = I->user_back();
17888 assert(User && "A single use instruction with no uses.");
17889
17890 switch (I->getOpcode()) {
17891 case Instruction::FMul: {
17892 // Don't break FMA, PowerPC prefers FMA.
17893 if (User->getOpcode() != Instruction::FSub &&
17894 User->getOpcode() != Instruction::FAdd)
17895 return true;
17896
17898 const Function *F = I->getFunction();
17899 const DataLayout &DL = F->getDataLayout();
17900 Type *Ty = User->getOperand(0)->getType();
17901
17902 return !(
17905 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17906 }
17907 case Instruction::Load: {
17908 // Don't break "store (load float*)" pattern, this pattern will be combined
17909 // to "store (load int32)" in later InstCombine pass. See function
17910 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17911 // cycles than loading a 32 bit integer.
17912 LoadInst *LI = cast<LoadInst>(I);
17913 // For the loads that combineLoadToOperationType does nothing, like
17914 // ordered load, it should be profitable to hoist them.
17915 // For swifterror load, it can only be used for pointer to pointer type, so
17916 // later type check should get rid of this case.
17917 if (!LI->isUnordered())
17918 return true;
17919
17920 if (User->getOpcode() != Instruction::Store)
17921 return true;
17922
17923 if (I->getType()->getTypeID() != Type::FloatTyID)
17924 return true;
17925
17926 return false;
17927 }
17928 default:
17929 return true;
17930 }
17931 return true;
17932}
17933
17934const MCPhysReg *
17936 // LR is a callee-save register, but we must treat it as clobbered by any call
17937 // site. Hence we include LR in the scratch registers, which are in turn added
17938 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17939 // to CTR, which is used by any indirect call.
17940 static const MCPhysReg ScratchRegs[] = {
17941 PPC::X12, PPC::LR8, PPC::CTR8, 0
17942 };
17943
17944 return ScratchRegs;
17945}
17946
17948 const Constant *PersonalityFn) const {
17949 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17950}
17951
17953 const Constant *PersonalityFn) const {
17954 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17955}
17956
17957bool
17959 EVT VT , unsigned DefinedValues) const {
17960 if (VT == MVT::v2i64)
17961 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17962
17963 if (Subtarget.hasVSX())
17964 return true;
17965
17967}
17968
17970 if (DisableILPPref || Subtarget.enableMachineScheduler())
17972
17973 return Sched::ILP;
17974}
17975
17976// Create a fast isel object.
17977FastISel *
17979 const TargetLibraryInfo *LibInfo) const {
17980 return PPC::createFastISel(FuncInfo, LibInfo);
17981}
17982
17983// 'Inverted' means the FMA opcode after negating one multiplicand.
17984// For example, (fma -a b c) = (fnmsub a b c)
17985static unsigned invertFMAOpcode(unsigned Opc) {
17986 switch (Opc) {
17987 default:
17988 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17989 case ISD::FMA:
17990 return PPCISD::FNMSUB;
17991 case PPCISD::FNMSUB:
17992 return ISD::FMA;
17993 }
17994}
17995
17997 bool LegalOps, bool OptForSize,
17999 unsigned Depth) const {
18001 return SDValue();
18002
18003 unsigned Opc = Op.getOpcode();
18004 EVT VT = Op.getValueType();
18005 SDNodeFlags Flags = Op.getNode()->getFlags();
18006
18007 switch (Opc) {
18008 case PPCISD::FNMSUB:
18009 if (!Op.hasOneUse() || !isTypeLegal(VT))
18010 break;
18011
18013 SDValue N0 = Op.getOperand(0);
18014 SDValue N1 = Op.getOperand(1);
18015 SDValue N2 = Op.getOperand(2);
18016 SDLoc Loc(Op);
18017
18019 SDValue NegN2 =
18020 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
18021
18022 if (!NegN2)
18023 return SDValue();
18024
18025 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
18026 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
18027 // These transformations may change sign of zeroes. For example,
18028 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
18029 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
18030 // Try and choose the cheaper one to negate.
18032 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
18033 N0Cost, Depth + 1);
18034
18036 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
18037 N1Cost, Depth + 1);
18038
18039 if (NegN0 && N0Cost <= N1Cost) {
18040 Cost = std::min(N0Cost, N2Cost);
18041 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
18042 } else if (NegN1) {
18043 Cost = std::min(N1Cost, N2Cost);
18044 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
18045 }
18046 }
18047
18048 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
18049 if (isOperationLegal(ISD::FMA, VT)) {
18050 Cost = N2Cost;
18051 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
18052 }
18053
18054 break;
18055 }
18056
18057 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
18058 Cost, Depth);
18059}
18060
18061// Override to enable LOAD_STACK_GUARD lowering on Linux.
18063 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
18064 return true;
18066}
18067
18068// Override to disable global variable loading on Linux and insert AIX canary
18069// word declaration.
18071 if (Subtarget.isAIXABI()) {
18072 M.getOrInsertGlobal(AIXSSPCanaryWordName,
18073 PointerType::getUnqual(M.getContext()));
18074 return;
18075 }
18076 if (!Subtarget.isTargetLinux())
18078}
18079
18081 if (Subtarget.isAIXABI())
18082 return M.getGlobalVariable(AIXSSPCanaryWordName);
18084}
18085
18087 bool ForCodeSize) const {
18088 if (!VT.isSimple() || !Subtarget.hasVSX())
18089 return false;
18090
18091 switch(VT.getSimpleVT().SimpleTy) {
18092 default:
18093 // For FP types that are currently not supported by PPC backend, return
18094 // false. Examples: f16, f80.
18095 return false;
18096 case MVT::f32:
18097 case MVT::f64: {
18098 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
18099 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
18100 return true;
18101 }
18102 bool IsExact;
18103 APSInt IntResult(16, false);
18104 // The rounding mode doesn't really matter because we only care about floats
18105 // that can be converted to integers exactly.
18106 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
18107 // For exact values in the range [-16, 15] we can materialize the float.
18108 if (IsExact && IntResult <= 15 && IntResult >= -16)
18109 return true;
18110 return Imm.isZero();
18111 }
18112 case MVT::ppcf128:
18113 return Imm.isPosZero();
18114 }
18115}
18116
18117// For vector shift operation op, fold
18118// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
18120 SelectionDAG &DAG) {
18121 SDValue N0 = N->getOperand(0);
18122 SDValue N1 = N->getOperand(1);
18123 EVT VT = N0.getValueType();
18124 unsigned OpSizeInBits = VT.getScalarSizeInBits();
18125 unsigned Opcode = N->getOpcode();
18126 unsigned TargetOpcode;
18127
18128 switch (Opcode) {
18129 default:
18130 llvm_unreachable("Unexpected shift operation");
18131 case ISD::SHL:
18132 TargetOpcode = PPCISD::SHL;
18133 break;
18134 case ISD::SRL:
18135 TargetOpcode = PPCISD::SRL;
18136 break;
18137 case ISD::SRA:
18138 TargetOpcode = PPCISD::SRA;
18139 break;
18140 }
18141
18142 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
18143 N1->getOpcode() == ISD::AND)
18144 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
18145 if (Mask->getZExtValue() == OpSizeInBits - 1)
18146 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
18147
18148 return SDValue();
18149}
18150
18151SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
18152 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18153 return Value;
18154
18155 SDValue N0 = N->getOperand(0);
18156 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18157 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
18158 N0.getOpcode() != ISD::SIGN_EXTEND ||
18159 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
18160 N->getValueType(0) != MVT::i64)
18161 return SDValue();
18162
18163 // We can't save an operation here if the value is already extended, and
18164 // the existing shift is easier to combine.
18165 SDValue ExtsSrc = N0.getOperand(0);
18166 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
18167 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
18168 return SDValue();
18169
18170 SDLoc DL(N0);
18171 SDValue ShiftBy = SDValue(CN1, 0);
18172 // We want the shift amount to be i32 on the extswli, but the shift could
18173 // have an i64.
18174 if (ShiftBy.getValueType() == MVT::i64)
18175 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
18176
18177 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
18178 ShiftBy);
18179}
18180
18181SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
18182 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18183 return Value;
18184
18185 return SDValue();
18186}
18187
18188SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
18189 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18190 return Value;
18191
18192 return SDValue();
18193}
18194
18195// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
18196// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
18197// When C is zero, the equation (addi Z, -C) can be simplified to Z
18198// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
18200 const PPCSubtarget &Subtarget) {
18201 if (!Subtarget.isPPC64())
18202 return SDValue();
18203
18204 SDValue LHS = N->getOperand(0);
18205 SDValue RHS = N->getOperand(1);
18206
18207 auto isZextOfCompareWithConstant = [](SDValue Op) {
18208 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
18209 Op.getValueType() != MVT::i64)
18210 return false;
18211
18212 SDValue Cmp = Op.getOperand(0);
18213 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
18214 Cmp.getOperand(0).getValueType() != MVT::i64)
18215 return false;
18216
18217 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
18218 int64_t NegConstant = 0 - Constant->getSExtValue();
18219 // Due to the limitations of the addi instruction,
18220 // -C is required to be [-32768, 32767].
18221 return isInt<16>(NegConstant);
18222 }
18223
18224 return false;
18225 };
18226
18227 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
18228 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
18229
18230 // If there is a pattern, canonicalize a zext operand to the RHS.
18231 if (LHSHasPattern && !RHSHasPattern)
18232 std::swap(LHS, RHS);
18233 else if (!LHSHasPattern && !RHSHasPattern)
18234 return SDValue();
18235
18236 SDLoc DL(N);
18237 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
18238 SDValue Cmp = RHS.getOperand(0);
18239 SDValue Z = Cmp.getOperand(0);
18240 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
18241 int64_t NegConstant = 0 - Constant->getSExtValue();
18242
18243 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
18244 default: break;
18245 case ISD::SETNE: {
18246 // when C == 0
18247 // --> addze X, (addic Z, -1).carry
18248 // /
18249 // add X, (zext(setne Z, C))--
18250 // \ when -32768 <= -C <= 32767 && C != 0
18251 // --> addze X, (addic (addi Z, -C), -1).carry
18252 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18253 DAG.getConstant(NegConstant, DL, MVT::i64));
18254 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18255 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
18256 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
18257 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
18258 SDValue(Addc.getNode(), 1));
18259 }
18260 case ISD::SETEQ: {
18261 // when C == 0
18262 // --> addze X, (subfic Z, 0).carry
18263 // /
18264 // add X, (zext(sete Z, C))--
18265 // \ when -32768 <= -C <= 32767 && C != 0
18266 // --> addze X, (subfic (addi Z, -C), 0).carry
18267 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18268 DAG.getConstant(NegConstant, DL, MVT::i64));
18269 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18270 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
18271 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
18272 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
18273 SDValue(Subc.getNode(), 1));
18274 }
18275 }
18276
18277 return SDValue();
18278}
18279
18280// Transform
18281// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
18282// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
18283// In this case both C1 and C2 must be known constants.
18284// C1+C2 must fit into a 34 bit signed integer.
18286 const PPCSubtarget &Subtarget) {
18287 if (!Subtarget.isUsingPCRelativeCalls())
18288 return SDValue();
18289
18290 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
18291 // If we find that node try to cast the Global Address and the Constant.
18292 SDValue LHS = N->getOperand(0);
18293 SDValue RHS = N->getOperand(1);
18294
18295 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18296 std::swap(LHS, RHS);
18297
18298 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18299 return SDValue();
18300
18301 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
18302 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
18303 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
18304
18305 // Check that both casts succeeded.
18306 if (!GSDN || !ConstNode)
18307 return SDValue();
18308
18309 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
18310 SDLoc DL(GSDN);
18311
18312 // The signed int offset needs to fit in 34 bits.
18313 if (!isInt<34>(NewOffset))
18314 return SDValue();
18315
18316 // The new global address is a copy of the old global address except
18317 // that it has the updated Offset.
18318 SDValue GA =
18319 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
18320 NewOffset, GSDN->getTargetFlags());
18321 SDValue MatPCRel =
18322 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
18323 return MatPCRel;
18324}
18325
18326SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
18327 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
18328 return Value;
18329
18330 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
18331 return Value;
18332
18333 return SDValue();
18334}
18335
18336// Detect TRUNCATE operations on bitcasts of float128 values.
18337// What we are looking for here is the situtation where we extract a subset
18338// of bits from a 128 bit float.
18339// This can be of two forms:
18340// 1) BITCAST of f128 feeding TRUNCATE
18341// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
18342// The reason this is required is because we do not have a legal i128 type
18343// and so we want to prevent having to store the f128 and then reload part
18344// of it.
18345SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
18346 DAGCombinerInfo &DCI) const {
18347 // If we are using CRBits then try that first.
18348 if (Subtarget.useCRBits()) {
18349 // Check if CRBits did anything and return that if it did.
18350 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
18351 return CRTruncValue;
18352 }
18353
18354 SDLoc dl(N);
18355 SDValue Op0 = N->getOperand(0);
18356
18357 // Looking for a truncate of i128 to i64.
18358 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
18359 return SDValue();
18360
18361 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
18362
18363 // SRL feeding TRUNCATE.
18364 if (Op0.getOpcode() == ISD::SRL) {
18365 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
18366 // The right shift has to be by 64 bits.
18367 if (!ConstNode || ConstNode->getZExtValue() != 64)
18368 return SDValue();
18369
18370 // Switch the element number to extract.
18371 EltToExtract = EltToExtract ? 0 : 1;
18372 // Update Op0 past the SRL.
18373 Op0 = Op0.getOperand(0);
18374 }
18375
18376 // BITCAST feeding a TRUNCATE possibly via SRL.
18377 if (Op0.getOpcode() == ISD::BITCAST &&
18378 Op0.getValueType() == MVT::i128 &&
18379 Op0.getOperand(0).getValueType() == MVT::f128) {
18380 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18381 return DCI.DAG.getNode(
18382 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18383 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18384 }
18385 return SDValue();
18386}
18387
18388SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18389 SelectionDAG &DAG = DCI.DAG;
18390
18391 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18392 if (!ConstOpOrElement)
18393 return SDValue();
18394
18395 // An imul is usually smaller than the alternative sequence for legal type.
18397 isOperationLegal(ISD::MUL, N->getValueType(0)))
18398 return SDValue();
18399
18400 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18401 switch (this->Subtarget.getCPUDirective()) {
18402 default:
18403 // TODO: enhance the condition for subtarget before pwr8
18404 return false;
18405 case PPC::DIR_PWR8:
18406 // type mul add shl
18407 // scalar 4 1 1
18408 // vector 7 2 2
18409 return true;
18410 case PPC::DIR_PWR9:
18411 case PPC::DIR_PWR10:
18412 case PPC::DIR_PWR11:
18414 // type mul add shl
18415 // scalar 5 2 2
18416 // vector 7 2 2
18417
18418 // The cycle RATIO of related operations are showed as a table above.
18419 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18420 // scalar and vector type. For 2 instrs patterns, add/sub + shl
18421 // are 4, it is always profitable; but for 3 instrs patterns
18422 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18423 // So we should only do it for vector type.
18424 return IsAddOne && IsNeg ? VT.isVector() : true;
18425 }
18426 };
18427
18428 EVT VT = N->getValueType(0);
18429 SDLoc DL(N);
18430
18431 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18432 bool IsNeg = MulAmt.isNegative();
18433 APInt MulAmtAbs = MulAmt.abs();
18434
18435 if ((MulAmtAbs - 1).isPowerOf2()) {
18436 // (mul x, 2^N + 1) => (add (shl x, N), x)
18437 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18438
18439 if (!IsProfitable(IsNeg, true, VT))
18440 return SDValue();
18441
18442 SDValue Op0 = N->getOperand(0);
18443 SDValue Op1 =
18444 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18445 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18446 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18447
18448 if (!IsNeg)
18449 return Res;
18450
18451 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18452 } else if ((MulAmtAbs + 1).isPowerOf2()) {
18453 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18454 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18455
18456 if (!IsProfitable(IsNeg, false, VT))
18457 return SDValue();
18458
18459 SDValue Op0 = N->getOperand(0);
18460 SDValue Op1 =
18461 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18462 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18463
18464 if (!IsNeg)
18465 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18466 else
18467 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18468
18469 } else {
18470 return SDValue();
18471 }
18472}
18473
18474// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18475// in combiner since we need to check SD flags and other subtarget features.
18476SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18477 DAGCombinerInfo &DCI) const {
18478 SDValue N0 = N->getOperand(0);
18479 SDValue N1 = N->getOperand(1);
18480 SDValue N2 = N->getOperand(2);
18481 SDNodeFlags Flags = N->getFlags();
18482 EVT VT = N->getValueType(0);
18483 SelectionDAG &DAG = DCI.DAG;
18485 unsigned Opc = N->getOpcode();
18486 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18487 bool LegalOps = !DCI.isBeforeLegalizeOps();
18488 SDLoc Loc(N);
18489
18490 if (!isOperationLegal(ISD::FMA, VT))
18491 return SDValue();
18492
18493 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18494 // since (fnmsub a b c)=-0 while c-ab=+0.
18495 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18496 return SDValue();
18497
18498 // (fma (fneg a) b c) => (fnmsub a b c)
18499 // (fnmsub (fneg a) b c) => (fma a b c)
18500 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18501 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18502
18503 // (fma a (fneg b) c) => (fnmsub a b c)
18504 // (fnmsub a (fneg b) c) => (fma a b c)
18505 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18506 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18507
18508 return SDValue();
18509}
18510
18511bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18512 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18513 if (!Subtarget.is64BitELFABI())
18514 return false;
18515
18516 // If not a tail call then no need to proceed.
18517 if (!CI->isTailCall())
18518 return false;
18519
18520 // If sibling calls have been disabled and tail-calls aren't guaranteed
18521 // there is no reason to duplicate.
18522 auto &TM = getTargetMachine();
18523 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18524 return false;
18525
18526 // Can't tail call a function called indirectly, or if it has variadic args.
18527 const Function *Callee = CI->getCalledFunction();
18528 if (!Callee || Callee->isVarArg())
18529 return false;
18530
18531 // Make sure the callee and caller calling conventions are eligible for tco.
18532 const Function *Caller = CI->getParent()->getParent();
18533 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18534 CI->getCallingConv()))
18535 return false;
18536
18537 // If the function is local then we have a good chance at tail-calling it
18538 return getTargetMachine().shouldAssumeDSOLocal(Callee);
18539}
18540
18541bool PPCTargetLowering::
18542isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18543 const Value *Mask = AndI.getOperand(1);
18544 // If the mask is suitable for andi. or andis. we should sink the and.
18545 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18546 // Can't handle constants wider than 64-bits.
18547 if (CI->getBitWidth() > 64)
18548 return false;
18549 int64_t ConstVal = CI->getZExtValue();
18550 return isUInt<16>(ConstVal) ||
18551 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18552 }
18553
18554 // For non-constant masks, we can always use the record-form and.
18555 return true;
18556}
18557
18558/// getAddrModeForFlags - Based on the set of address flags, select the most
18559/// optimal instruction format to match by.
18560PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18561 // This is not a node we should be handling here.
18562 if (Flags == PPC::MOF_None)
18563 return PPC::AM_None;
18564 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18565 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18566 if ((Flags & FlagSet) == FlagSet)
18567 return PPC::AM_DForm;
18568 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18569 if ((Flags & FlagSet) == FlagSet)
18570 return PPC::AM_DSForm;
18571 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18572 if ((Flags & FlagSet) == FlagSet)
18573 return PPC::AM_DQForm;
18574 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18575 if ((Flags & FlagSet) == FlagSet)
18576 return PPC::AM_PrefixDForm;
18577 // If no other forms are selected, return an X-Form as it is the most
18578 // general addressing mode.
18579 return PPC::AM_XForm;
18580}
18581
18582/// Set alignment flags based on whether or not the Frame Index is aligned.
18583/// Utilized when computing flags for address computation when selecting
18584/// load and store instructions.
18585static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18586 SelectionDAG &DAG) {
18587 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18588 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18589 if (!FI)
18590 return;
18592 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18593 // If this is (add $FI, $S16Imm), the alignment flags are already set
18594 // based on the immediate. We just need to clear the alignment flags
18595 // if the FI alignment is weaker.
18596 if ((FrameIndexAlign % 4) != 0)
18597 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18598 if ((FrameIndexAlign % 16) != 0)
18599 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18600 // If the address is a plain FrameIndex, set alignment flags based on
18601 // FI alignment.
18602 if (!IsAdd) {
18603 if ((FrameIndexAlign % 4) == 0)
18604 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18605 if ((FrameIndexAlign % 16) == 0)
18606 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18607 }
18608}
18609
18610/// Given a node, compute flags that are used for address computation when
18611/// selecting load and store instructions. The flags computed are stored in
18612/// FlagSet. This function takes into account whether the node is a constant,
18613/// an ADD, OR, or a constant, and computes the address flags accordingly.
18614static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18615 SelectionDAG &DAG) {
18616 // Set the alignment flags for the node depending on if the node is
18617 // 4-byte or 16-byte aligned.
18618 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18619 if ((Imm & 0x3) == 0)
18620 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18621 if ((Imm & 0xf) == 0)
18622 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18623 };
18624
18625 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18626 // All 32-bit constants can be computed as LIS + Disp.
18627 const APInt &ConstImm = CN->getAPIntValue();
18628 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18629 FlagSet |= PPC::MOF_AddrIsSImm32;
18630 SetAlignFlagsForImm(ConstImm.getZExtValue());
18631 setAlignFlagsForFI(N, FlagSet, DAG);
18632 }
18633 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18634 FlagSet |= PPC::MOF_RPlusSImm34;
18635 else // Let constant materialization handle large constants.
18636 FlagSet |= PPC::MOF_NotAddNorCst;
18637 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18638 // This address can be represented as an addition of:
18639 // - Register + Imm16 (possibly a multiple of 4/16)
18640 // - Register + Imm34
18641 // - Register + PPCISD::Lo
18642 // - Register + Register
18643 // In any case, we won't have to match this as Base + Zero.
18644 SDValue RHS = N.getOperand(1);
18645 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18646 const APInt &ConstImm = CN->getAPIntValue();
18647 if (ConstImm.isSignedIntN(16)) {
18648 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18649 SetAlignFlagsForImm(ConstImm.getZExtValue());
18650 setAlignFlagsForFI(N, FlagSet, DAG);
18651 }
18652 if (ConstImm.isSignedIntN(34))
18653 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18654 else
18655 FlagSet |= PPC::MOF_RPlusR; // Register.
18656 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18657 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18658 else
18659 FlagSet |= PPC::MOF_RPlusR;
18660 } else { // The address computation is not a constant or an addition.
18661 setAlignFlagsForFI(N, FlagSet, DAG);
18662 FlagSet |= PPC::MOF_NotAddNorCst;
18663 }
18664}
18665
18666static bool isPCRelNode(SDValue N) {
18667 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18668 isValidPCRelNode<ConstantPoolSDNode>(N) ||
18669 isValidPCRelNode<GlobalAddressSDNode>(N) ||
18670 isValidPCRelNode<JumpTableSDNode>(N) ||
18671 isValidPCRelNode<BlockAddressSDNode>(N));
18672}
18673
18674/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18675/// the address flags of the load/store instruction that is to be matched.
18676unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18677 SelectionDAG &DAG) const {
18678 unsigned FlagSet = PPC::MOF_None;
18679
18680 // Compute subtarget flags.
18681 if (!Subtarget.hasP9Vector())
18682 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18683 else
18684 FlagSet |= PPC::MOF_SubtargetP9;
18685
18686 if (Subtarget.hasPrefixInstrs())
18687 FlagSet |= PPC::MOF_SubtargetP10;
18688
18689 if (Subtarget.hasSPE())
18690 FlagSet |= PPC::MOF_SubtargetSPE;
18691
18692 // Check if we have a PCRel node and return early.
18693 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18694 return FlagSet;
18695
18696 // If the node is the paired load/store intrinsics, compute flags for
18697 // address computation and return early.
18698 unsigned ParentOp = Parent->getOpcode();
18699 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18700 (ParentOp == ISD::INTRINSIC_VOID))) {
18701 unsigned ID = Parent->getConstantOperandVal(1);
18702 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18703 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18704 ? Parent->getOperand(2)
18705 : Parent->getOperand(3);
18706 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18707 FlagSet |= PPC::MOF_Vector;
18708 return FlagSet;
18709 }
18710 }
18711
18712 // Mark this as something we don't want to handle here if it is atomic
18713 // or pre-increment instruction.
18714 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18715 if (LSB->isIndexed())
18716 return PPC::MOF_None;
18717
18718 // Compute in-memory type flags. This is based on if there are scalars,
18719 // floats or vectors.
18720 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18721 assert(MN && "Parent should be a MemSDNode!");
18722 EVT MemVT = MN->getMemoryVT();
18723 unsigned Size = MemVT.getSizeInBits();
18724 if (MemVT.isScalarInteger()) {
18725 assert(Size <= 128 &&
18726 "Not expecting scalar integers larger than 16 bytes!");
18727 if (Size < 32)
18728 FlagSet |= PPC::MOF_SubWordInt;
18729 else if (Size == 32)
18730 FlagSet |= PPC::MOF_WordInt;
18731 else
18732 FlagSet |= PPC::MOF_DoubleWordInt;
18733 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18734 if (Size == 128)
18735 FlagSet |= PPC::MOF_Vector;
18736 else if (Size == 256) {
18737 assert(Subtarget.pairedVectorMemops() &&
18738 "256-bit vectors are only available when paired vector memops is "
18739 "enabled!");
18740 FlagSet |= PPC::MOF_Vector;
18741 } else
18742 llvm_unreachable("Not expecting illegal vectors!");
18743 } else { // Floating point type: can be scalar, f128 or vector types.
18744 if (Size == 32 || Size == 64)
18745 FlagSet |= PPC::MOF_ScalarFloat;
18746 else if (MemVT == MVT::f128 || MemVT.isVector())
18747 FlagSet |= PPC::MOF_Vector;
18748 else
18749 llvm_unreachable("Not expecting illegal scalar floats!");
18750 }
18751
18752 // Compute flags for address computation.
18753 computeFlagsForAddressComputation(N, FlagSet, DAG);
18754
18755 // Compute type extension flags.
18756 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18757 switch (LN->getExtensionType()) {
18758 case ISD::SEXTLOAD:
18759 FlagSet |= PPC::MOF_SExt;
18760 break;
18761 case ISD::EXTLOAD:
18762 case ISD::ZEXTLOAD:
18763 FlagSet |= PPC::MOF_ZExt;
18764 break;
18765 case ISD::NON_EXTLOAD:
18766 FlagSet |= PPC::MOF_NoExt;
18767 break;
18768 }
18769 } else
18770 FlagSet |= PPC::MOF_NoExt;
18771
18772 // For integers, no extension is the same as zero extension.
18773 // We set the extension mode to zero extension so we don't have
18774 // to add separate entries in AddrModesMap for loads and stores.
18775 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18776 FlagSet |= PPC::MOF_ZExt;
18777 FlagSet &= ~PPC::MOF_NoExt;
18778 }
18779
18780 // If we don't have prefixed instructions, 34-bit constants should be
18781 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18782 bool IsNonP1034BitConst =
18784 FlagSet) == PPC::MOF_RPlusSImm34;
18785 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18786 IsNonP1034BitConst)
18787 FlagSet |= PPC::MOF_NotAddNorCst;
18788
18789 return FlagSet;
18790}
18791
18792/// SelectForceXFormMode - Given the specified address, force it to be
18793/// represented as an indexed [r+r] operation (an XForm instruction).
18795 SDValue &Base,
18796 SelectionDAG &DAG) const {
18797
18799 int16_t ForceXFormImm = 0;
18800 if (provablyDisjointOr(DAG, N) &&
18801 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18802 Disp = N.getOperand(0);
18803 Base = N.getOperand(1);
18804 return Mode;
18805 }
18806
18807 // If the address is the result of an add, we will utilize the fact that the
18808 // address calculation includes an implicit add. However, we can reduce
18809 // register pressure if we do not materialize a constant just for use as the
18810 // index register. We only get rid of the add if it is not an add of a
18811 // value and a 16-bit signed constant and both have a single use.
18812 if (N.getOpcode() == ISD::ADD &&
18813 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18814 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18815 Disp = N.getOperand(0);
18816 Base = N.getOperand(1);
18817 return Mode;
18818 }
18819
18820 // Otherwise, use R0 as the base register.
18821 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18822 N.getValueType());
18823 Base = N;
18824
18825 return Mode;
18826}
18827
18829 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18830 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18831 EVT ValVT = Val.getValueType();
18832 // If we are splitting a scalar integer into f64 parts (i.e. so they
18833 // can be placed into VFRC registers), we need to zero extend and
18834 // bitcast the values. This will ensure the value is placed into a
18835 // VSR using direct moves or stack operations as needed.
18836 if (PartVT == MVT::f64 &&
18837 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18838 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18839 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18840 Parts[0] = Val;
18841 return true;
18842 }
18843 return false;
18844}
18845
18846SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18847 SelectionDAG &DAG) const {
18848 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18850 EVT RetVT = Op.getValueType();
18851 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18852 SDValue Callee =
18853 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18854 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
18857 for (const SDValue &N : Op->op_values()) {
18858 EVT ArgVT = N.getValueType();
18859 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18860 Entry.Node = N;
18861 Entry.Ty = ArgTy;
18862 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
18863 Entry.IsZExt = !Entry.IsSExt;
18864 Args.push_back(Entry);
18865 }
18866
18867 SDValue InChain = DAG.getEntryNode();
18868 SDValue TCChain = InChain;
18869 const Function &F = DAG.getMachineFunction().getFunction();
18870 bool isTailCall =
18871 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18872 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18873 if (isTailCall)
18874 InChain = TCChain;
18875 CLI.setDebugLoc(SDLoc(Op))
18876 .setChain(InChain)
18877 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18878 .setTailCall(isTailCall)
18879 .setSExtResult(SignExtend)
18880 .setZExtResult(!SignExtend)
18882 return TLI.LowerCallTo(CLI).first;
18883}
18884
18885SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18886 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18887 SelectionDAG &DAG) const {
18888 if (Op.getValueType() == MVT::f32)
18889 return lowerToLibCall(LibCallFloatName, Op, DAG);
18890
18891 if (Op.getValueType() == MVT::f64)
18892 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18893
18894 return SDValue();
18895}
18896
18897bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18898 SDNodeFlags Flags = Op.getNode()->getFlags();
18899 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18900 Flags.hasNoNaNs() && Flags.hasNoInfs();
18901}
18902
18903bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18904 return Op.getNode()->getFlags().hasApproximateFuncs();
18905}
18906
18907bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18909}
18910
18911SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18912 const char *LibCallFloatName,
18913 const char *LibCallDoubleNameFinite,
18914 const char *LibCallFloatNameFinite,
18915 SDValue Op,
18916 SelectionDAG &DAG) const {
18917 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18918 return SDValue();
18919
18920 if (!isLowringToMASSFiniteSafe(Op))
18921 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18922 DAG);
18923
18924 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18925 LibCallDoubleNameFinite, Op, DAG);
18926}
18927
18928SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18929 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18930 "__xl_powf_finite", Op, DAG);
18931}
18932
18933SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18934 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18935 "__xl_sinf_finite", Op, DAG);
18936}
18937
18938SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18939 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18940 "__xl_cosf_finite", Op, DAG);
18941}
18942
18943SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18944 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18945 "__xl_logf_finite", Op, DAG);
18946}
18947
18948SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18949 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18950 "__xl_log10f_finite", Op, DAG);
18951}
18952
18953SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18954 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18955 "__xl_expf_finite", Op, DAG);
18956}
18957
18958// If we happen to match to an aligned D-Form, check if the Frame Index is
18959// adequately aligned. If it is not, reset the mode to match to X-Form.
18960static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18961 PPC::AddrMode &Mode) {
18962 if (!isa<FrameIndexSDNode>(N))
18963 return;
18964 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18965 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18966 Mode = PPC::AM_XForm;
18967}
18968
18969/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18970/// compute the address flags of the node, get the optimal address mode based
18971/// on the flags, and set the Base and Disp based on the address mode.
18973 SDValue N, SDValue &Disp,
18974 SDValue &Base,
18975 SelectionDAG &DAG,
18976 MaybeAlign Align) const {
18977 SDLoc DL(Parent);
18978
18979 // Compute the address flags.
18980 unsigned Flags = computeMOFlags(Parent, N, DAG);
18981
18982 // Get the optimal address mode based on the Flags.
18983 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18984
18985 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18986 // Select an X-Form load if it is not.
18987 setXFormForUnalignedFI(N, Flags, Mode);
18988
18989 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18990 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18991 assert(Subtarget.isUsingPCRelativeCalls() &&
18992 "Must be using PC-Relative calls when a valid PC-Relative node is "
18993 "present!");
18994 Mode = PPC::AM_PCRel;
18995 }
18996
18997 // Set Base and Disp accordingly depending on the address mode.
18998 switch (Mode) {
18999 case PPC::AM_DForm:
19000 case PPC::AM_DSForm:
19001 case PPC::AM_DQForm: {
19002 // This is a register plus a 16-bit immediate. The base will be the
19003 // register and the displacement will be the immediate unless it
19004 // isn't sufficiently aligned.
19005 if (Flags & PPC::MOF_RPlusSImm16) {
19006 SDValue Op0 = N.getOperand(0);
19007 SDValue Op1 = N.getOperand(1);
19008 int16_t Imm = Op1->getAsZExtVal();
19009 if (!Align || isAligned(*Align, Imm)) {
19010 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
19011 Base = Op0;
19012 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
19013 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19014 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19015 }
19016 break;
19017 }
19018 }
19019 // This is a register plus the @lo relocation. The base is the register
19020 // and the displacement is the global address.
19021 else if (Flags & PPC::MOF_RPlusLo) {
19022 Disp = N.getOperand(1).getOperand(0); // The global address.
19027 Base = N.getOperand(0);
19028 break;
19029 }
19030 // This is a constant address at most 32 bits. The base will be
19031 // zero or load-immediate-shifted and the displacement will be
19032 // the low 16 bits of the address.
19033 else if (Flags & PPC::MOF_AddrIsSImm32) {
19034 auto *CN = cast<ConstantSDNode>(N);
19035 EVT CNType = CN->getValueType(0);
19036 uint64_t CNImm = CN->getZExtValue();
19037 // If this address fits entirely in a 16-bit sext immediate field, codegen
19038 // this as "d, 0".
19039 int16_t Imm;
19040 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
19041 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
19042 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19043 CNType);
19044 break;
19045 }
19046 // Handle 32-bit sext immediate with LIS + Addr mode.
19047 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
19048 (!Align || isAligned(*Align, CNImm))) {
19049 int32_t Addr = (int32_t)CNImm;
19050 // Otherwise, break this down into LIS + Disp.
19051 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
19052 Base =
19053 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
19054 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
19055 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
19056 break;
19057 }
19058 }
19059 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
19060 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
19061 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
19062 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19063 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19064 } else
19065 Base = N;
19066 break;
19067 }
19068 case PPC::AM_PrefixDForm: {
19069 int64_t Imm34 = 0;
19070 unsigned Opcode = N.getOpcode();
19071 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
19072 (isIntS34Immediate(N.getOperand(1), Imm34))) {
19073 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
19074 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19075 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
19076 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19077 else
19078 Base = N.getOperand(0);
19079 } else if (isIntS34Immediate(N, Imm34)) {
19080 // The address is a 34-bit signed immediate.
19081 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19082 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
19083 }
19084 break;
19085 }
19086 case PPC::AM_PCRel: {
19087 // When selecting PC-Relative instructions, "Base" is not utilized as
19088 // we select the address as [PC+imm].
19089 Disp = N;
19090 break;
19091 }
19092 case PPC::AM_None:
19093 break;
19094 default: { // By default, X-Form is always available to be selected.
19095 // When a frame index is not aligned, we also match by XForm.
19096 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
19097 Base = FI ? N : N.getOperand(1);
19098 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19099 N.getValueType())
19100 : N.getOperand(0);
19101 break;
19102 }
19103 }
19104 return Mode;
19105}
19106
19108 bool Return,
19109 bool IsVarArg) const {
19110 switch (CC) {
19111 case CallingConv::Cold:
19112 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
19113 default:
19114 return CC_PPC64_ELF;
19115 }
19116}
19117
19119 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
19120}
19121
19124 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
19125 if (shouldInlineQuadwordAtomics() && Size == 128)
19127
19128 switch (AI->getOperation()) {
19134 default:
19136 }
19137
19138 llvm_unreachable("unreachable atomicrmw operation");
19139}
19140
19143 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
19144 if (shouldInlineQuadwordAtomics() && Size == 128)
19147}
19148
19149static Intrinsic::ID
19151 switch (BinOp) {
19152 default:
19153 llvm_unreachable("Unexpected AtomicRMW BinOp");
19155 return Intrinsic::ppc_atomicrmw_xchg_i128;
19156 case AtomicRMWInst::Add:
19157 return Intrinsic::ppc_atomicrmw_add_i128;
19158 case AtomicRMWInst::Sub:
19159 return Intrinsic::ppc_atomicrmw_sub_i128;
19160 case AtomicRMWInst::And:
19161 return Intrinsic::ppc_atomicrmw_and_i128;
19162 case AtomicRMWInst::Or:
19163 return Intrinsic::ppc_atomicrmw_or_i128;
19164 case AtomicRMWInst::Xor:
19165 return Intrinsic::ppc_atomicrmw_xor_i128;
19167 return Intrinsic::ppc_atomicrmw_nand_i128;
19168 }
19169}
19170
19172 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
19173 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
19174 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19175 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19176 Type *ValTy = Incr->getType();
19177 assert(ValTy->getPrimitiveSizeInBits() == 128);
19178 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19179 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
19180 Value *IncrHi =
19181 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
19182 Value *LoHi = Builder.CreateIntrinsic(
19184 {AlignedAddr, IncrLo, IncrHi});
19185 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19186 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19187 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19188 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19189 return Builder.CreateOr(
19190 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19191}
19192
19194 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
19195 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
19196 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19197 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19198 Type *ValTy = CmpVal->getType();
19199 assert(ValTy->getPrimitiveSizeInBits() == 128);
19200 Function *IntCmpXchg =
19201 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
19202 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19203 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
19204 Value *CmpHi =
19205 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
19206 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
19207 Value *NewHi =
19208 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
19209 emitLeadingFence(Builder, CI, Ord);
19210 Value *LoHi =
19211 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
19212 emitTrailingFence(Builder, CI, Ord);
19213 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19214 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19215 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19216 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19217 return Builder.CreateOr(
19218 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19219}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition: IVUsers.cpp:48
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
static constexpr Register SPReg
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5465
bool isDenormal() const
Definition: APFloat.h:1441
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1407
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
APInt abs() const
Get the absolute value.
Definition: APInt.h:1773
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:471
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1700
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ Xor
*p = old ^ v
Definition: Instructions.h:730
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
BinOp getOperation() const
Definition: Instructions.h:805
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:893
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1349
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1879
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1407
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1269
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1342
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1275
unsigned arg_size() const
Definition: InstrTypes.h:1292
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:197
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:873
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:851
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:843
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:707
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:373
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
arg_iterator arg_begin()
Definition: Function.h:868
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
size_t arg_size() const
Definition: Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:595
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:267
bool hasHiddenVisibility() const
Definition: GlobalValue.h:250
StringRef getSection() const
Definition: Globals.cpp:189
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:130
bool hasComdat() const
Definition: GlobalValue.h:241
Type * getValueType() const
Definition: GlobalValue.h:296
bool hasProtectedVisibility() const
Definition: GlobalValue.h:251
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1460
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:176
bool isUnordered() const
Definition: Instructions.h:249
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Metadata node.
Definition: Metadata.h:1069
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
bool isAIXFuncTLSModelOptInitDone() const
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:304
bool is32BitELFABI() const
Definition: PPCSubtarget.h:224
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:266
MVT getScalarIntVT() const
Definition: PPCSubtarget.h:253
bool isAIXABI() const
Definition: PPCSubtarget.h:219
bool useSoftFloat() const
Definition: PPCSubtarget.h:179
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:147
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:207
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:260
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:278
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:150
bool isSVR4ABI() const
Definition: PPCSubtarget.h:220
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:139
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:211
bool isLittleEndian() const
Definition: PPCSubtarget.h:186
bool isTargetLinux() const
Definition: PPCSubtarget.h:217
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:284
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:296
bool is64BitELFABI() const
Definition: PPCSubtarget.h:223
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:160
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:302
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:157
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:272
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:497
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:501
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:456
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:825
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:753
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:710
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:496
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:794
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:871
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:765
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
void clear()
Definition: SmallSet.h:204
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
Class to represent struct types.
Definition: DerivedTypes.h:218
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:310
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:255
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ TargetConstantPool
Definition: ISDOpcodes.h:174
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1270
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1118
@ TargetJumpTable
Definition: ISDOpcodes.h:173
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:439
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:440
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1168
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1165
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1211
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1398
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1276
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1494
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1635
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1551
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1602
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1582
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1641
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:96
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:144
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:192
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:195
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:170
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:201
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:152
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:119
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:148
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:198
@ MO_TPREL_HA
Definition: PPC.h:177
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:111
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition: PPC.h:186
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:138
@ MO_TPREL_LO
Definition: PPC.h:176
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:173
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:164
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:189
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:133
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:158
@ MO_HA
Definition: PPC.h:174
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:115
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ SETBC
SETBC - The ISA 3.1 (P10) SETBC instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ SETBCR
SETBCR - The ISA 3.1 (P10) SETBCR instruction.
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:65
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:105
@ XTY_ER
External reference.
Definition: XCOFF.h:241
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:154
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:159
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:563
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:265
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:297
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:268
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:301
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:53
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:59
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)