LLVM 23.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
148 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
166// A faster local-[exec|dynamic] TLS access sequence (enabled with the
167// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
168// variables; consistent with the IBM XL compiler, we apply a max size of
169// slightly under 32KB.
171
172// FIXME: Remove this once the bug has been fixed!
174
176 const PPCSubtarget &STI)
177 : TargetLowering(TM, STI), Subtarget(STI) {
178 // Initialize map that relates the PPC addressing modes to the computed flags
179 // of a load/store instruction. The map is used to determine the optimal
180 // addressing mode when selecting load and stores.
181 initializeAddrModeMap();
182 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183 // arguments are at least 4/8 bytes aligned.
184 bool isPPC64 = Subtarget.isPPC64();
185 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
186 const MVT RegVT = Subtarget.getScalarIntVT();
187
188 // Set up the register classes.
189 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
190 if (!useSoftFloat()) {
191 if (hasSPE()) {
192 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
193 // EFPU2 APU only supports f32
194 if (!Subtarget.hasEFPU2())
195 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
196 } else {
197 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
198 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
199 }
200 }
201
204
205 // PowerPC uses addo_carry,subo_carry to propagate carry.
208
209 // On P10, the default lowering generates better code using the
210 // setbc instruction.
211 if (!Subtarget.hasP10Vector()) {
214 if (isPPC64) {
217 }
218 }
219
220 // Match BITREVERSE to customized fast code sequence in the td file.
223
224 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
226
227 // Custom lower inline assembly to check for special registers.
230
231 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
232 for (MVT VT : MVT::integer_valuetypes()) {
235 }
236
237 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
239
240 if (Subtarget.isISA3_0()) {
241 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
242 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
243 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
244 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
245 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
246 } else {
247 // No extending loads from f16 or HW conversions back and forth.
248 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
250 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
253 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
256 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
257 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
258 }
259
260 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
261
262 // PowerPC has pre-inc load and store's.
273 if (!Subtarget.hasSPE()) {
278 }
279
280 if (Subtarget.useCRBits()) {
282
283 if (isPPC64 || Subtarget.hasFPCVT()) {
288
290 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
292 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
293
298
300 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
302 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
303 } else {
308 }
309
310 // PowerPC does not support direct load/store of condition registers.
313
314 // FIXME: Remove this once the ANDI glue bug is fixed:
315 if (ANDIGlueBug)
317
318 for (MVT VT : MVT::integer_valuetypes()) {
321 setTruncStoreAction(VT, MVT::i1, Expand);
322 }
323
324 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
325 }
326
327 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
328 // PPC (the libcall is not available).
333
334 // We do not currently implement these libm ops for PowerPC.
335 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
336 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
337 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
338 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
340 setOperationAction(ISD::FREM, MVT::ppcf128, LibCall);
341
342 // PowerPC has no SREM/UREM instructions unless we are on P9
343 // On P9 we may use a hardware instruction to compute the remainder.
344 // When the result of both the remainder and the division is required it is
345 // more efficient to compute the remainder from the result of the division
346 // rather than use the remainder instruction. The instructions are legalized
347 // directly because the DivRemPairsPass performs the transformation at the IR
348 // level.
349 if (Subtarget.isISA3_0()) {
354 } else {
359 }
360
361 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
370
371 // Handle constrained floating-point operations of scalar.
372 // TODO: Handle SPE specific operation.
378
383
384 if (!Subtarget.hasSPE()) {
387 }
388
389 if (Subtarget.hasVSX()) {
392 }
393
394 if (Subtarget.hasFSQRT()) {
397 }
398
399 if (Subtarget.hasFPRND()) {
404
409 }
410
411 // We don't support sin/cos/sqrt/fmod/pow
422
423 // MASS transformation for LLVM intrinsics with replicating fast-math flag
424 // to be consistent to PPCGenScalarMASSEntries pass
425 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
438 }
439
440 if (Subtarget.hasSPE()) {
443 } else {
444 setOperationAction(ISD::FMA , MVT::f64, Legal);
445 setOperationAction(ISD::FMA , MVT::f32, Legal);
448 }
449
450 if (Subtarget.hasSPE())
451 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
452
453 // If we're enabling GP optimizations, use hardware square root
454 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
456
457 if (!Subtarget.hasFSQRT() &&
458 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
460
461 if (Subtarget.hasFCPSGN()) {
464 } else {
467 }
468
469 if (Subtarget.hasFPRND()) {
474
479 }
480
481 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
482 // instruction xxbrd to speed up scalar BSWAP64.
483 if (Subtarget.isISA3_1()) {
486 } else {
489 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
490 }
491
492 // CTPOP or CTTZ were introduced in P8/P9 respectively
493 if (Subtarget.isISA3_0()) {
494 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
495 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
496 } else {
497 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
498 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
499 }
500
501 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
504 } else {
507 }
508
509 // PowerPC does not have ROTR
512
513 if (!Subtarget.useCRBits()) {
514 // PowerPC does not have Select
519 }
520
521 // PowerPC wants to turn select_cc of FP into fsel when possible.
524
525 // PowerPC wants to optimize integer setcc a bit
526 if (!Subtarget.useCRBits())
528
529 if (Subtarget.hasFPU()) {
533
537 }
538
539 // PowerPC does not have BRCOND which requires SetCC
540 if (!Subtarget.useCRBits())
542
544
545 if (Subtarget.hasSPE()) {
546 // SPE has built-in conversions
553
554 // SPE supports signaling compare of f32/f64.
557 } else {
558 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
561
562 // PowerPC does not have [U|S]INT_TO_FP
567 }
568
569 if (Subtarget.hasDirectMove() && isPPC64) {
574
583 } else {
588 }
589
590 // We cannot sextinreg(i1). Expand to shifts.
592
593 // Custom handling for PowerPC ucmp instruction
595 setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
596
597 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
598 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
599 // support continuation, user-level threading, and etc.. As a result, no
600 // other SjLj exception interfaces are implemented and please don't build
601 // your own exception handling based on them.
602 // LLVM/Clang supports zero-cost DWARF exception handling.
605
606 // We want to legalize GlobalAddress and ConstantPool nodes into the
607 // appropriate instructions to materialize the address.
618
619 // TRAP is legal.
620 setOperationAction(ISD::TRAP, MVT::Other, Legal);
621
622 // TRAMPOLINE is custom lowered.
625
626 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
628
629 if (Subtarget.is64BitELFABI()) {
630 // VAARG always uses double-word chunks, so promote anything smaller.
632 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
634 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
636 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
638 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
640 } else if (Subtarget.is32BitELFABI()) {
641 // VAARG is custom lowered with the 32-bit SVR4 ABI.
644 } else
646
647 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
648 if (Subtarget.is32BitELFABI())
650 else
652
653 // Use the default implementation.
654 setOperationAction(ISD::VAEND , MVT::Other, Expand);
663
664 if (Subtarget.isISA3_0() && isPPC64) {
665 setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
666 setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
667 setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
668 setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
669 setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
670 setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
671 setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
672 setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
673 }
674
675 // We want to custom lower some of our intrinsics.
681
682 // To handle counter-based loop conditions.
685
690
691 // Comparisons that require checking two conditions.
692 if (Subtarget.hasSPE()) {
697 }
710
713
714 if (Subtarget.has64BitSupport()) {
715 // They also have instructions for converting between i64 and fp.
724 // This is just the low 32 bits of a (signed) fp->i64 conversion.
725 // We cannot do this with Promote because i64 is not a legal type.
728
729 if (Subtarget.hasLFIWAX() || isPPC64) {
732 }
733 } else {
734 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
735 if (Subtarget.hasSPE()) {
738 } else {
741 }
742 }
743
744 // With the instructions enabled under FPCVT, we can do everything.
745 if (Subtarget.hasFPCVT()) {
746 if (Subtarget.has64BitSupport()) {
755 }
756
765 }
766
767 if (Subtarget.use64BitRegs()) {
768 // 64-bit PowerPC implementations can support i64 types directly
769 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
770 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
772 // 64-bit PowerPC wants to expand i128 shifts itself.
776 } else {
777 // 32-bit PowerPC wants to expand i64 shifts itself.
781 }
782
783 // PowerPC has better expansions for funnel shifts than the generic
784 // TargetLowering::expandFunnelShift.
785 if (Subtarget.has64BitSupport()) {
788 }
791
792 if (Subtarget.hasVSX()) {
803 }
804
805 if (Subtarget.hasAltivec()) {
806 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
813 }
814 // First set operation action for all vector types to expand. Then we
815 // will selectively turn on ones that can be effectively codegen'd.
817 // add/sub are legal for all supported vector VT's.
820
821 // For v2i64, these are only valid with P8Vector. This is corrected after
822 // the loop.
823 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
828 }
829 else {
834 }
835
836 if (Subtarget.hasVSX()) {
842 }
843
844 // Vector instructions introduced in P8
845 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
848 }
849 else {
852 }
853
854 // Vector instructions introduced in P9
855 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
857 else
859
860 // We promote all shuffles to v16i8.
862 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
863
864 // We promote all non-typed operations to v4i32.
866 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
868 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
870 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
872 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
874 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
877 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
879 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
880
881 // No other operations are legal.
920
921 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
922 setTruncStoreAction(VT, InnerVT, Expand);
925 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
926 }
927 }
929 if (!Subtarget.hasP8Vector()) {
930 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
931 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
932 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
933 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
934 }
935
936 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
937 // with merges, splats, etc.
939
940 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
941 // are cheap, so handle them before they get expanded to scalar.
947
948 setOperationAction(ISD::AND , MVT::v4i32, Legal);
949 setOperationAction(ISD::OR , MVT::v4i32, Legal);
950 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
951 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
953 Subtarget.useCRBits() ? Legal : Expand);
954 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
964 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
967
968 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
969 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
970 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
971 if (Subtarget.hasAltivec())
972 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
974 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
975 if (Subtarget.hasP8Altivec())
976 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
977
978 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
979 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
980 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
981 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
982
983 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
984 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
985
986 if (Subtarget.hasVSX()) {
987 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
988 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
990 }
991
992 if (Subtarget.hasP8Altivec())
993 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
994 else
995 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
996
997 if (Subtarget.isISA3_1()) {
998 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
999 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
1000 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
1001 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
1002 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
1003 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
1004 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
1005 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
1006 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
1007 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
1008 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
1009 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
1010 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
1011 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
1012 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
1013 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
1014 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
1015 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
1016 }
1017
1018 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1019 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1020
1023 // LE is P8+/64-bit so direct moves are supported and these operations
1024 // are legal. The custom transformation requires 64-bit since we need a
1025 // pair of stores that will cover a 128-bit load for P10.
1026 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1030 }
1031
1036
1037 // Altivec does not contain unordered floating-point compare instructions
1038 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1039 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1040 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1041 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1042
1043 if (Subtarget.hasVSX()) {
1046 if (Subtarget.hasP8Vector()) {
1049 }
1050 if (Subtarget.hasDirectMove() && isPPC64) {
1059 }
1061
1062 // The nearbyint variants are not allowed to raise the inexact exception
1063 // so we can only code-gen them with fpexcept.ignore.
1068
1069 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1070 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1071 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1072 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1073 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1076
1077 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1078 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1081
1082 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1083 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1084
1085 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1086 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1087
1088 // Share the Altivec comparison restrictions.
1089 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1090 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1091 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1092 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1093
1094 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1095 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1096
1098
1099 if (Subtarget.hasP8Vector())
1100 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1101
1102 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1103
1104 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1105 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1106 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1107
1108 if (Subtarget.hasP8Altivec()) {
1109 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1110 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1111 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1112
1113 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1114 // SRL, but not for SRA because of the instructions available:
1115 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1116 // doing
1117 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1118 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1119 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1120
1121 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1122 }
1123 else {
1124 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1125 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1126 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1127
1128 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1129
1130 // VSX v2i64 only supports non-arithmetic operations.
1131 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1132 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1133 }
1134
1135 if (Subtarget.isISA3_1())
1136 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1137 else
1138 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1139
1140 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1141 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1143 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1144
1146
1155
1156 // Custom handling for partial vectors of integers converted to
1157 // floating point. We already have optimal handling for v2i32 through
1158 // the DAG combine, so those aren't necessary.
1175
1176 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1177 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1178 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1179 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1182
1185
1186 // Handle constrained floating-point operations of vector.
1187 // The predictor is `hasVSX` because altivec instruction has
1188 // no exception but VSX vector instruction has.
1202
1216
1217 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1218 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1219
1220 for (MVT FPT : MVT::fp_valuetypes())
1221 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1222
1223 // Expand the SELECT to SELECT_CC
1225
1226 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1227 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1228
1229 // No implementation for these ops for PowerPC.
1231 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1232 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1233 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1236 }
1237
1238 if (Subtarget.hasP8Altivec()) {
1239 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1240 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1241 }
1242
1243 if (Subtarget.hasP9Vector()) {
1246
1247 // Test data class instructions store results in CR bits.
1248 if (Subtarget.useCRBits()) {
1253 }
1254
1255 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1256 // SRL, but not for SRA because of the instructions available:
1257 // VS{RL} and VS{RL}O.
1258 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1259 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1260 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1261
1262 setOperationAction(ISD::FADD, MVT::f128, Legal);
1263 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1264 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1265 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1267
1268 setOperationAction(ISD::FMA, MVT::f128, Legal);
1275
1277 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1279 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1282
1286
1287 // Handle constrained floating-point operations of fp128
1304 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1305 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1306 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1307 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1308 } else if (Subtarget.hasVSX()) {
1311
1312 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1313 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1314
1315 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1316 // fp_to_uint and int_to_fp.
1319
1320 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1321 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1322 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1323 setOperationAction(ISD::FABS, MVT::f128, Expand);
1325 setOperationAction(ISD::FMA, MVT::f128, Expand);
1327
1328 // Expand the fp_extend if the target type is fp128.
1331
1332 // Expand the fp_round if the source type is fp128.
1333 for (MVT VT : {MVT::f32, MVT::f64}) {
1336 }
1337
1342
1343 // Lower following f128 select_cc pattern:
1344 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1346
1347 // We need to handle f128 SELECT_CC with integer result type.
1349 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1350 }
1351
1352 if (Subtarget.hasP9Altivec()) {
1353 if (Subtarget.isISA3_1()) {
1358 } else {
1361 }
1369
1370 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1371 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1372 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1373 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1374 }
1375
1376 if (Subtarget.hasP10Vector()) {
1378 }
1379 }
1380
1381 if (Subtarget.pairedVectorMemops()) {
1382 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1383 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1384 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1385 }
1386 if (Subtarget.hasMMA()) {
1387 if (Subtarget.isISAFuture()) {
1388 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1389 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1390 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1391 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1392 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1393 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1394 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1395 } else {
1396 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1397 }
1398 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1399 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1401 }
1402
1403 if (Subtarget.has64BitSupport())
1405
1406 if (Subtarget.isISA3_1())
1407 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1408
1409 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1410
1411 if (!isPPC64) {
1414 }
1415
1420 }
1421
1423
1424 if (Subtarget.hasAltivec()) {
1425 // Altivec instructions set fields to all zeros or all ones.
1427 }
1428
1431 else if (isPPC64)
1433 else
1435
1436 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1437
1438 // We have target-specific dag combine patterns for the following nodes:
1442 if (Subtarget.hasFPCVT())
1445 if (Subtarget.useCRBits())
1449
1451
1453
1454 if (Subtarget.useCRBits()) {
1456 }
1457
1458 if (Subtarget.hasP8Vector())
1460
1461 // With 32 condition bits, we don't need to sink (and duplicate) compares
1462 // aggressively in CodeGenPrep.
1463 if (Subtarget.useCRBits()) {
1465 }
1466
1467 // TODO: The default entry number is set to 64. This stops most jump table
1468 // generation on PPC. But it is good for current PPC HWs because the indirect
1469 // branch instruction mtctr to the jump table may lead to bad branch predict.
1470 // Re-evaluate this value on future HWs that can do better with mtctr.
1472
1473 // The default minimum of largest number in a BitTest cluster is 3.
1475
1477 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1478
1479 auto CPUDirective = Subtarget.getCPUDirective();
1480 switch (CPUDirective) {
1481 default: break;
1482 case PPC::DIR_970:
1483 case PPC::DIR_A2:
1484 case PPC::DIR_E500:
1485 case PPC::DIR_E500mc:
1486 case PPC::DIR_E5500:
1487 case PPC::DIR_PWR4:
1488 case PPC::DIR_PWR5:
1489 case PPC::DIR_PWR5X:
1490 case PPC::DIR_PWR6:
1491 case PPC::DIR_PWR6X:
1492 case PPC::DIR_PWR7:
1493 case PPC::DIR_PWR8:
1494 case PPC::DIR_PWR9:
1495 case PPC::DIR_PWR10:
1496 case PPC::DIR_PWR11:
1500 break;
1501 }
1502
1503 if (Subtarget.enableMachineScheduler())
1505 else
1507
1509
1510 // The Freescale cores do better with aggressive inlining of memcpy and
1511 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1512 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1513 MaxStoresPerMemset = 32;
1515 MaxStoresPerMemcpy = 32;
1519 } else if (CPUDirective == PPC::DIR_A2) {
1520 // The A2 also benefits from (very) aggressive inlining of memcpy and
1521 // friends. The overhead of a the function call, even when warm, can be
1522 // over one hundred cycles.
1523 MaxStoresPerMemset = 128;
1524 MaxStoresPerMemcpy = 128;
1525 MaxStoresPerMemmove = 128;
1526 MaxLoadsPerMemcmp = 128;
1527 } else {
1530 }
1531
1532 // Enable generation of STXVP instructions by default for mcpu=future.
1533 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1534 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1535 DisableAutoPairedVecSt = false;
1536
1537 IsStrictFPEnabled = true;
1538
1539 // Let the subtarget (CPU) decide if a predictable select is more expensive
1540 // than the corresponding branch. This information is used in CGP to decide
1541 // when to convert selects into branches.
1542 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1543
1545}
1546
1547// *********************************** NOTE ************************************
1548// For selecting load and store instructions, the addressing modes are defined
1549// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1550// patterns to match the load the store instructions.
1551//
1552// The TD definitions for the addressing modes correspond to their respective
1553// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1554// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1555// address mode flags of a particular node. Afterwards, the computed address
1556// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1557// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1558// accordingly, based on the preferred addressing mode.
1559//
1560// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1561// MemOpFlags contains all the possible flags that can be used to compute the
1562// optimal addressing mode for load and store instructions.
1563// AddrMode contains all the possible load and store addressing modes available
1564// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1565//
1566// When adding new load and store instructions, it is possible that new address
1567// flags may need to be added into MemOpFlags, and a new addressing mode will
1568// need to be added to AddrMode. An entry of the new addressing mode (consisting
1569// of the minimal and main distinguishing address flags for the new load/store
1570// instructions) will need to be added into initializeAddrModeMap() below.
1571// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1572// need to be updated to account for selecting the optimal addressing mode.
1573// *****************************************************************************
1574/// Initialize the map that relates the different addressing modes of the load
1575/// and store instructions to a set of flags. This ensures the load/store
1576/// instruction is correctly matched during instruction selection.
1577void PPCTargetLowering::initializeAddrModeMap() {
1578 AddrModesMap[PPC::AM_DForm] = {
1579 // LWZ, STW
1584 // LBZ, LHZ, STB, STH
1589 // LHA
1594 // LFS, LFD, STFS, STFD
1599 };
1600 AddrModesMap[PPC::AM_DSForm] = {
1601 // LWA
1605 // LD, STD
1609 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1613 };
1614 AddrModesMap[PPC::AM_DQForm] = {
1615 // LXV, STXV
1619 };
1620 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1622 // TODO: Add mapping for quadword load/store.
1623}
1624
1625/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1626/// the desired ByVal argument alignment.
1627static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1628 if (MaxAlign == MaxMaxAlign)
1629 return;
1630 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1631 if (MaxMaxAlign >= 32 &&
1632 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1633 MaxAlign = Align(32);
1634 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1635 MaxAlign < 16)
1636 MaxAlign = Align(16);
1637 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1638 Align EltAlign;
1639 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1640 if (EltAlign > MaxAlign)
1641 MaxAlign = EltAlign;
1642 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1643 for (auto *EltTy : STy->elements()) {
1644 Align EltAlign;
1645 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1646 if (EltAlign > MaxAlign)
1647 MaxAlign = EltAlign;
1648 if (MaxAlign == MaxMaxAlign)
1649 break;
1650 }
1651 }
1652}
1653
1654/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1655/// function arguments in the caller parameter area.
1657 const DataLayout &DL) const {
1658 // 16byte and wider vectors are passed on 16byte boundary.
1659 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1660 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1661 if (Subtarget.hasAltivec())
1662 getMaxByValAlign(Ty, Alignment, Align(16));
1663 return Alignment;
1664}
1665
1667 return Subtarget.useSoftFloat();
1668}
1669
1671 return Subtarget.hasSPE();
1672}
1673
1675 return VT.isScalarInteger();
1676}
1677
1679 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1680 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1681 return false;
1682
1683 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1684 if (VTy->getScalarType()->isIntegerTy()) {
1685 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1686 if (ElemSizeInBits == 32) {
1687 Index = Subtarget.isLittleEndian() ? 2 : 1;
1688 return true;
1689 }
1690 if (ElemSizeInBits == 64) {
1691 Index = Subtarget.isLittleEndian() ? 1 : 0;
1692 return true;
1693 }
1694 }
1695 }
1696 return false;
1697}
1698
1700 EVT VT) const {
1701 if (!VT.isVector())
1702 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1703
1705}
1706
1708 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1709 return true;
1710}
1711
1712//===----------------------------------------------------------------------===//
1713// Node matching predicates, for use by the tblgen matching code.
1714//===----------------------------------------------------------------------===//
1715
1716/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1719 return CFP->getValueAPF().isZero();
1720 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1721 // Maybe this has already been legalized into the constant pool?
1722 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1723 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1724 return CFP->getValueAPF().isZero();
1725 }
1726 return false;
1727}
1728
1729/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1730/// true if Op is undef or if it matches the specified value.
1731static bool isConstantOrUndef(int Op, int Val) {
1732 return Op < 0 || Op == Val;
1733}
1734
1735/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1736/// VPKUHUM instruction.
1737/// The ShuffleKind distinguishes between big-endian operations with
1738/// two different inputs (0), either-endian operations with two identical
1739/// inputs (1), and little-endian operations with two different inputs (2).
1740/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1742 SelectionDAG &DAG) {
1743 bool IsLE = DAG.getDataLayout().isLittleEndian();
1744 if (ShuffleKind == 0) {
1745 if (IsLE)
1746 return false;
1747 for (unsigned i = 0; i != 16; ++i)
1748 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1749 return false;
1750 } else if (ShuffleKind == 2) {
1751 if (!IsLE)
1752 return false;
1753 for (unsigned i = 0; i != 16; ++i)
1754 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1755 return false;
1756 } else if (ShuffleKind == 1) {
1757 unsigned j = IsLE ? 0 : 1;
1758 for (unsigned i = 0; i != 8; ++i)
1759 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1760 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1761 return false;
1762 }
1763 return true;
1764}
1765
1766/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1767/// VPKUWUM instruction.
1768/// The ShuffleKind distinguishes between big-endian operations with
1769/// two different inputs (0), either-endian operations with two identical
1770/// inputs (1), and little-endian operations with two different inputs (2).
1771/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1773 SelectionDAG &DAG) {
1774 bool IsLE = DAG.getDataLayout().isLittleEndian();
1775 if (ShuffleKind == 0) {
1776 if (IsLE)
1777 return false;
1778 for (unsigned i = 0; i != 16; i += 2)
1779 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1780 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1781 return false;
1782 } else if (ShuffleKind == 2) {
1783 if (!IsLE)
1784 return false;
1785 for (unsigned i = 0; i != 16; i += 2)
1786 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1787 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1788 return false;
1789 } else if (ShuffleKind == 1) {
1790 unsigned j = IsLE ? 0 : 2;
1791 for (unsigned i = 0; i != 8; i += 2)
1792 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1793 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1794 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1795 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1796 return false;
1797 }
1798 return true;
1799}
1800
1801/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1802/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1803/// current subtarget.
1804///
1805/// The ShuffleKind distinguishes between big-endian operations with
1806/// two different inputs (0), either-endian operations with two identical
1807/// inputs (1), and little-endian operations with two different inputs (2).
1808/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1810 SelectionDAG &DAG) {
1811 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1812 if (!Subtarget.hasP8Vector())
1813 return false;
1814
1815 bool IsLE = DAG.getDataLayout().isLittleEndian();
1816 if (ShuffleKind == 0) {
1817 if (IsLE)
1818 return false;
1819 for (unsigned i = 0; i != 16; i += 4)
1820 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1821 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1822 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1823 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1824 return false;
1825 } else if (ShuffleKind == 2) {
1826 if (!IsLE)
1827 return false;
1828 for (unsigned i = 0; i != 16; i += 4)
1829 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1830 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1831 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1832 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1833 return false;
1834 } else if (ShuffleKind == 1) {
1835 unsigned j = IsLE ? 0 : 4;
1836 for (unsigned i = 0; i != 8; i += 4)
1837 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1838 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1839 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1840 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1841 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1842 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1843 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1844 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1845 return false;
1846 }
1847 return true;
1848}
1849
1850/// isVMerge - Common function, used to match vmrg* shuffles.
1851///
1852static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1853 unsigned LHSStart, unsigned RHSStart) {
1854 if (N->getValueType(0) != MVT::v16i8)
1855 return false;
1856 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1857 "Unsupported merge size!");
1858
1859 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1860 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1861 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1862 LHSStart+j+i*UnitSize) ||
1863 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1864 RHSStart+j+i*UnitSize))
1865 return false;
1866 }
1867 return true;
1868}
1869
1870/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1871/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1872/// The ShuffleKind distinguishes between big-endian merges with two
1873/// different inputs (0), either-endian merges with two identical inputs (1),
1874/// and little-endian merges with two different inputs (2). For the latter,
1875/// the input operands are swapped (see PPCInstrAltivec.td).
1877 unsigned ShuffleKind, SelectionDAG &DAG) {
1878 if (DAG.getDataLayout().isLittleEndian()) {
1879 if (ShuffleKind == 1) // unary
1880 return isVMerge(N, UnitSize, 0, 0);
1881 else if (ShuffleKind == 2) // swapped
1882 return isVMerge(N, UnitSize, 0, 16);
1883 else
1884 return false;
1885 } else {
1886 if (ShuffleKind == 1) // unary
1887 return isVMerge(N, UnitSize, 8, 8);
1888 else if (ShuffleKind == 0) // normal
1889 return isVMerge(N, UnitSize, 8, 24);
1890 else
1891 return false;
1892 }
1893}
1894
1895/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1896/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1897/// The ShuffleKind distinguishes between big-endian merges with two
1898/// different inputs (0), either-endian merges with two identical inputs (1),
1899/// and little-endian merges with two different inputs (2). For the latter,
1900/// the input operands are swapped (see PPCInstrAltivec.td).
1902 unsigned ShuffleKind, SelectionDAG &DAG) {
1903 if (DAG.getDataLayout().isLittleEndian()) {
1904 if (ShuffleKind == 1) // unary
1905 return isVMerge(N, UnitSize, 8, 8);
1906 else if (ShuffleKind == 2) // swapped
1907 return isVMerge(N, UnitSize, 8, 24);
1908 else
1909 return false;
1910 } else {
1911 if (ShuffleKind == 1) // unary
1912 return isVMerge(N, UnitSize, 0, 0);
1913 else if (ShuffleKind == 0) // normal
1914 return isVMerge(N, UnitSize, 0, 16);
1915 else
1916 return false;
1917 }
1918}
1919
1920/**
1921 * Common function used to match vmrgew and vmrgow shuffles
1922 *
1923 * The indexOffset determines whether to look for even or odd words in
1924 * the shuffle mask. This is based on the of the endianness of the target
1925 * machine.
1926 * - Little Endian:
1927 * - Use offset of 0 to check for odd elements
1928 * - Use offset of 4 to check for even elements
1929 * - Big Endian:
1930 * - Use offset of 0 to check for even elements
1931 * - Use offset of 4 to check for odd elements
1932 * A detailed description of the vector element ordering for little endian and
1933 * big endian can be found at
1934 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1935 * Targeting your applications - what little endian and big endian IBM XL C/C++
1936 * compiler differences mean to you
1937 *
1938 * The mask to the shuffle vector instruction specifies the indices of the
1939 * elements from the two input vectors to place in the result. The elements are
1940 * numbered in array-access order, starting with the first vector. These vectors
1941 * are always of type v16i8, thus each vector will contain 16 elements of size
1942 * 8. More info on the shuffle vector can be found in the
1943 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1944 * Language Reference.
1945 *
1946 * The RHSStartValue indicates whether the same input vectors are used (unary)
1947 * or two different input vectors are used, based on the following:
1948 * - If the instruction uses the same vector for both inputs, the range of the
1949 * indices will be 0 to 15. In this case, the RHSStart value passed should
1950 * be 0.
1951 * - If the instruction has two different vectors then the range of the
1952 * indices will be 0 to 31. In this case, the RHSStart value passed should
1953 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1954 * to 31 specify elements in the second vector).
1955 *
1956 * \param[in] N The shuffle vector SD Node to analyze
1957 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1958 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1959 * vector to the shuffle_vector instruction
1960 * \return true iff this shuffle vector represents an even or odd word merge
1961 */
1962static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1963 unsigned RHSStartValue) {
1964 if (N->getValueType(0) != MVT::v16i8)
1965 return false;
1966
1967 for (unsigned i = 0; i < 2; ++i)
1968 for (unsigned j = 0; j < 4; ++j)
1969 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1970 i*RHSStartValue+j+IndexOffset) ||
1971 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1972 i*RHSStartValue+j+IndexOffset+8))
1973 return false;
1974 return true;
1975}
1976
1977/**
1978 * Determine if the specified shuffle mask is suitable for the vmrgew or
1979 * vmrgow instructions.
1980 *
1981 * \param[in] N The shuffle vector SD Node to analyze
1982 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1983 * \param[in] ShuffleKind Identify the type of merge:
1984 * - 0 = big-endian merge with two different inputs;
1985 * - 1 = either-endian merge with two identical inputs;
1986 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1987 * little-endian merges).
1988 * \param[in] DAG The current SelectionDAG
1989 * \return true iff this shuffle mask
1990 */
1992 unsigned ShuffleKind, SelectionDAG &DAG) {
1993 if (DAG.getDataLayout().isLittleEndian()) {
1994 unsigned indexOffset = CheckEven ? 4 : 0;
1995 if (ShuffleKind == 1) // Unary
1996 return isVMerge(N, indexOffset, 0);
1997 else if (ShuffleKind == 2) // swapped
1998 return isVMerge(N, indexOffset, 16);
1999 else
2000 return false;
2001 }
2002 else {
2003 unsigned indexOffset = CheckEven ? 0 : 4;
2004 if (ShuffleKind == 1) // Unary
2005 return isVMerge(N, indexOffset, 0);
2006 else if (ShuffleKind == 0) // Normal
2007 return isVMerge(N, indexOffset, 16);
2008 else
2009 return false;
2010 }
2011 return false;
2012}
2013
2014/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2015/// amount, otherwise return -1.
2016/// The ShuffleKind distinguishes between big-endian operations with two
2017/// different inputs (0), either-endian operations with two identical inputs
2018/// (1), and little-endian operations with two different inputs (2). For the
2019/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2020int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2021 SelectionDAG &DAG) {
2022 if (N->getValueType(0) != MVT::v16i8)
2023 return -1;
2024
2026
2027 // Find the first non-undef value in the shuffle mask.
2028 unsigned i;
2029 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2030 /*search*/;
2031
2032 if (i == 16) return -1; // all undef.
2033
2034 // Otherwise, check to see if the rest of the elements are consecutively
2035 // numbered from this value.
2036 unsigned ShiftAmt = SVOp->getMaskElt(i);
2037 if (ShiftAmt < i) return -1;
2038
2039 ShiftAmt -= i;
2040 bool isLE = DAG.getDataLayout().isLittleEndian();
2041
2042 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2043 // Check the rest of the elements to see if they are consecutive.
2044 for (++i; i != 16; ++i)
2045 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2046 return -1;
2047 } else if (ShuffleKind == 1) {
2048 // Check the rest of the elements to see if they are consecutive.
2049 for (++i; i != 16; ++i)
2050 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2051 return -1;
2052 } else
2053 return -1;
2054
2055 if (isLE)
2056 ShiftAmt = 16 - ShiftAmt;
2057
2058 return ShiftAmt;
2059}
2060
2061/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2062/// specifies a splat of a single element that is suitable for input to
2063/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2065 EVT VT = N->getValueType(0);
2066 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2067 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2068
2069 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2070 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2071
2072 // The consecutive indices need to specify an element, not part of two
2073 // different elements. So abandon ship early if this isn't the case.
2074 if (N->getMaskElt(0) % EltSize != 0)
2075 return false;
2076
2077 // This is a splat operation if each element of the permute is the same, and
2078 // if the value doesn't reference the second vector.
2079 unsigned ElementBase = N->getMaskElt(0);
2080
2081 // FIXME: Handle UNDEF elements too!
2082 if (ElementBase >= 16)
2083 return false;
2084
2085 // Check that the indices are consecutive, in the case of a multi-byte element
2086 // splatted with a v16i8 mask.
2087 for (unsigned i = 1; i != EltSize; ++i)
2088 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2089 return false;
2090
2091 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2092 // An UNDEF element is a sequence of UNDEF bytes.
2093 if (N->getMaskElt(i) < 0) {
2094 for (unsigned j = 1; j != EltSize; ++j)
2095 if (N->getMaskElt(i + j) >= 0)
2096 return false;
2097 } else
2098 for (unsigned j = 0; j != EltSize; ++j)
2099 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2100 return false;
2101 }
2102 return true;
2103}
2104
2105/// Check that the mask is shuffling N byte elements. Within each N byte
2106/// element of the mask, the indices could be either in increasing or
2107/// decreasing order as long as they are consecutive.
2108/// \param[in] N the shuffle vector SD Node to analyze
2109/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2110/// Word/DoubleWord/QuadWord).
2111/// \param[in] StepLen the delta indices number among the N byte element, if
2112/// the mask is in increasing/decreasing order then it is 1/-1.
2113/// \return true iff the mask is shuffling N byte elements.
2114static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2115 int StepLen) {
2116 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2117 "Unexpected element width.");
2118 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2119
2120 unsigned NumOfElem = 16 / Width;
2121 unsigned MaskVal[16]; // Width is never greater than 16
2122 for (unsigned i = 0; i < NumOfElem; ++i) {
2123 MaskVal[0] = N->getMaskElt(i * Width);
2124 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2125 return false;
2126 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2127 return false;
2128 }
2129
2130 for (unsigned int j = 1; j < Width; ++j) {
2131 MaskVal[j] = N->getMaskElt(i * Width + j);
2132 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2133 return false;
2134 }
2135 }
2136 }
2137
2138 return true;
2139}
2140
2141bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2142 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2143 if (!isNByteElemShuffleMask(N, 4, 1))
2144 return false;
2145
2146 // Now we look at mask elements 0,4,8,12
2147 unsigned M0 = N->getMaskElt(0) / 4;
2148 unsigned M1 = N->getMaskElt(4) / 4;
2149 unsigned M2 = N->getMaskElt(8) / 4;
2150 unsigned M3 = N->getMaskElt(12) / 4;
2151 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2152 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2153
2154 // Below, let H and L be arbitrary elements of the shuffle mask
2155 // where H is in the range [4,7] and L is in the range [0,3].
2156 // H, 1, 2, 3 or L, 5, 6, 7
2157 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2158 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2159 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2160 InsertAtByte = IsLE ? 12 : 0;
2161 Swap = M0 < 4;
2162 return true;
2163 }
2164 // 0, H, 2, 3 or 4, L, 6, 7
2165 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2166 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2167 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2168 InsertAtByte = IsLE ? 8 : 4;
2169 Swap = M1 < 4;
2170 return true;
2171 }
2172 // 0, 1, H, 3 or 4, 5, L, 7
2173 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2174 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2175 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2176 InsertAtByte = IsLE ? 4 : 8;
2177 Swap = M2 < 4;
2178 return true;
2179 }
2180 // 0, 1, 2, H or 4, 5, 6, L
2181 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2182 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2183 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2184 InsertAtByte = IsLE ? 0 : 12;
2185 Swap = M3 < 4;
2186 return true;
2187 }
2188
2189 // If both vector operands for the shuffle are the same vector, the mask will
2190 // contain only elements from the first one and the second one will be undef.
2191 if (N->getOperand(1).isUndef()) {
2192 ShiftElts = 0;
2193 Swap = true;
2194 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2195 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2196 InsertAtByte = IsLE ? 12 : 0;
2197 return true;
2198 }
2199 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2200 InsertAtByte = IsLE ? 8 : 4;
2201 return true;
2202 }
2203 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2204 InsertAtByte = IsLE ? 4 : 8;
2205 return true;
2206 }
2207 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2208 InsertAtByte = IsLE ? 0 : 12;
2209 return true;
2210 }
2211 }
2212
2213 return false;
2214}
2215
2217 bool &Swap, bool IsLE) {
2218 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2219 // Ensure each byte index of the word is consecutive.
2220 if (!isNByteElemShuffleMask(N, 4, 1))
2221 return false;
2222
2223 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2224 unsigned M0 = N->getMaskElt(0) / 4;
2225 unsigned M1 = N->getMaskElt(4) / 4;
2226 unsigned M2 = N->getMaskElt(8) / 4;
2227 unsigned M3 = N->getMaskElt(12) / 4;
2228
2229 // If both vector operands for the shuffle are the same vector, the mask will
2230 // contain only elements from the first one and the second one will be undef.
2231 if (N->getOperand(1).isUndef()) {
2232 assert(M0 < 4 && "Indexing into an undef vector?");
2233 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2234 return false;
2235
2236 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2237 Swap = false;
2238 return true;
2239 }
2240
2241 // Ensure each word index of the ShuffleVector Mask is consecutive.
2242 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2243 return false;
2244
2245 if (IsLE) {
2246 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2247 // Input vectors don't need to be swapped if the leading element
2248 // of the result is one of the 3 left elements of the second vector
2249 // (or if there is no shift to be done at all).
2250 Swap = false;
2251 ShiftElts = (8 - M0) % 8;
2252 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2253 // Input vectors need to be swapped if the leading element
2254 // of the result is one of the 3 left elements of the first vector
2255 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2256 Swap = true;
2257 ShiftElts = (4 - M0) % 4;
2258 }
2259
2260 return true;
2261 } else { // BE
2262 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2263 // Input vectors don't need to be swapped if the leading element
2264 // of the result is one of the 4 elements of the first vector.
2265 Swap = false;
2266 ShiftElts = M0;
2267 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2268 // Input vectors need to be swapped if the leading element
2269 // of the result is one of the 4 elements of the right vector.
2270 Swap = true;
2271 ShiftElts = M0 - 4;
2272 }
2273
2274 return true;
2275 }
2276}
2277
2279 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2280
2281 if (!isNByteElemShuffleMask(N, Width, -1))
2282 return false;
2283
2284 for (int i = 0; i < 16; i += Width)
2285 if (N->getMaskElt(i) != i + Width - 1)
2286 return false;
2287
2288 return true;
2289}
2290
2294
2298
2302
2306
2307/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2308/// if the inputs to the instruction should be swapped and set \p DM to the
2309/// value for the immediate.
2310/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2311/// AND element 0 of the result comes from the first input (LE) or second input
2312/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2313/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2314/// mask.
2316 bool &Swap, bool IsLE) {
2317 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2318
2319 // Ensure each byte index of the double word is consecutive.
2320 if (!isNByteElemShuffleMask(N, 8, 1))
2321 return false;
2322
2323 unsigned M0 = N->getMaskElt(0) / 8;
2324 unsigned M1 = N->getMaskElt(8) / 8;
2325 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2326
2327 // If both vector operands for the shuffle are the same vector, the mask will
2328 // contain only elements from the first one and the second one will be undef.
2329 if (N->getOperand(1).isUndef()) {
2330 if ((M0 | M1) < 2) {
2331 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2332 Swap = false;
2333 return true;
2334 } else
2335 return false;
2336 }
2337
2338 if (IsLE) {
2339 if (M0 > 1 && M1 < 2) {
2340 Swap = false;
2341 } else if (M0 < 2 && M1 > 1) {
2342 M0 = (M0 + 2) % 4;
2343 M1 = (M1 + 2) % 4;
2344 Swap = true;
2345 } else
2346 return false;
2347
2348 // Note: if control flow comes here that means Swap is already set above
2349 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2350 return true;
2351 } else { // BE
2352 if (M0 < 2 && M1 > 1) {
2353 Swap = false;
2354 } else if (M0 > 1 && M1 < 2) {
2355 M0 = (M0 + 2) % 4;
2356 M1 = (M1 + 2) % 4;
2357 Swap = true;
2358 } else
2359 return false;
2360
2361 // Note: if control flow comes here that means Swap is already set above
2362 DM = (M0 << 1) + (M1 & 1);
2363 return true;
2364 }
2365}
2366
2367
2368/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2369/// appropriate for PPC mnemonics (which have a big endian bias - namely
2370/// elements are counted from the left of the vector register).
2371unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2372 SelectionDAG &DAG) {
2374 assert(isSplatShuffleMask(SVOp, EltSize));
2375 EVT VT = SVOp->getValueType(0);
2376
2377 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2378 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2379 : SVOp->getMaskElt(0);
2380
2381 if (DAG.getDataLayout().isLittleEndian())
2382 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2383 else
2384 return SVOp->getMaskElt(0) / EltSize;
2385}
2386
2387/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2388/// by using a vspltis[bhw] instruction of the specified element size, return
2389/// the constant being splatted. The ByteSize field indicates the number of
2390/// bytes of each element [124] -> [bhw].
2392 SDValue OpVal;
2393
2394 // If ByteSize of the splat is bigger than the element size of the
2395 // build_vector, then we have a case where we are checking for a splat where
2396 // multiple elements of the buildvector are folded together into a single
2397 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2398 unsigned EltSize = 16/N->getNumOperands();
2399 if (EltSize < ByteSize) {
2400 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2401 SDValue UniquedVals[4];
2402 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2403
2404 // See if all of the elements in the buildvector agree across.
2405 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2406 if (N->getOperand(i).isUndef()) continue;
2407 // If the element isn't a constant, bail fully out.
2408 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2409
2410 if (!UniquedVals[i&(Multiple-1)].getNode())
2411 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2412 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2413 return SDValue(); // no match.
2414 }
2415
2416 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2417 // either constant or undef values that are identical for each chunk. See
2418 // if these chunks can form into a larger vspltis*.
2419
2420 // Check to see if all of the leading entries are either 0 or -1. If
2421 // neither, then this won't fit into the immediate field.
2422 bool LeadingZero = true;
2423 bool LeadingOnes = true;
2424 for (unsigned i = 0; i != Multiple-1; ++i) {
2425 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2426
2427 LeadingZero &= isNullConstant(UniquedVals[i]);
2428 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2429 }
2430 // Finally, check the least significant entry.
2431 if (LeadingZero) {
2432 if (!UniquedVals[Multiple-1].getNode())
2433 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2434 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2435 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2436 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2437 }
2438 if (LeadingOnes) {
2439 if (!UniquedVals[Multiple-1].getNode())
2440 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2441 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2442 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2443 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2444 }
2445
2446 return SDValue();
2447 }
2448
2449 // Check to see if this buildvec has a single non-undef value in its elements.
2450 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2451 if (N->getOperand(i).isUndef()) continue;
2452 if (!OpVal.getNode())
2453 OpVal = N->getOperand(i);
2454 else if (OpVal != N->getOperand(i))
2455 return SDValue();
2456 }
2457
2458 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2459
2460 unsigned ValSizeInBytes = EltSize;
2461 uint64_t Value = 0;
2462 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2463 Value = CN->getZExtValue();
2464 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2465 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2466 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2467 }
2468
2469 // If the splat value is larger than the element value, then we can never do
2470 // this splat. The only case that we could fit the replicated bits into our
2471 // immediate field for would be zero, and we prefer to use vxor for it.
2472 if (ValSizeInBytes < ByteSize) return SDValue();
2473
2474 // If the element value is larger than the splat value, check if it consists
2475 // of a repeated bit pattern of size ByteSize.
2476 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2477 return SDValue();
2478
2479 // Properly sign extend the value.
2480 int MaskVal = SignExtend32(Value, ByteSize * 8);
2481
2482 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2483 if (MaskVal == 0) return SDValue();
2484
2485 // Finally, if this value fits in a 5 bit sext field, return it
2486 if (SignExtend32<5>(MaskVal) == MaskVal)
2487 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2488 return SDValue();
2489}
2490
2491//===----------------------------------------------------------------------===//
2492// Addressing Mode Selection
2493//===----------------------------------------------------------------------===//
2494
2495/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2496/// or 64-bit immediate, and if the value can be accurately represented as a
2497/// sign extension from a 16-bit value. If so, this returns true and the
2498/// immediate.
2499bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2500 if (!isa<ConstantSDNode>(N))
2501 return false;
2502
2503 Imm = (int16_t)N->getAsZExtVal();
2504 if (N->getValueType(0) == MVT::i32)
2505 return Imm == (int32_t)N->getAsZExtVal();
2506 else
2507 return Imm == (int64_t)N->getAsZExtVal();
2508}
2510 return isIntS16Immediate(Op.getNode(), Imm);
2511}
2512
2513/// Used when computing address flags for selecting loads and stores.
2514/// If we have an OR, check if the LHS and RHS are provably disjoint.
2515/// An OR of two provably disjoint values is equivalent to an ADD.
2516/// Most PPC load/store instructions compute the effective address as a sum,
2517/// so doing this conversion is useful.
2518static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2519 if (N.getOpcode() != ISD::OR)
2520 return false;
2521 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2522 if (!LHSKnown.Zero.getBoolValue())
2523 return false;
2524 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2525 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2526}
2527
2528/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2529/// be represented as an indexed [r+r] operation.
2531 SDValue &Index,
2532 SelectionDAG &DAG) const {
2533 for (SDNode *U : N->users()) {
2534 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2535 if (Memop->getMemoryVT() == MVT::f64) {
2536 Base = N.getOperand(0);
2537 Index = N.getOperand(1);
2538 return true;
2539 }
2540 }
2541 }
2542 return false;
2543}
2544
2545/// isIntS34Immediate - This method tests if value of node given can be
2546/// accurately represented as a sign extension from a 34-bit value. If so,
2547/// this returns true and the immediate.
2548bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2549 if (!isa<ConstantSDNode>(N))
2550 return false;
2551
2552 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2553 return isInt<34>(Imm);
2554}
2556 return isIntS34Immediate(Op.getNode(), Imm);
2557}
2558
2559/// SelectAddressRegReg - Given the specified addressed, check to see if it
2560/// can be represented as an indexed [r+r] operation. Returns false if it
2561/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2562/// non-zero and N can be represented by a base register plus a signed 16-bit
2563/// displacement, make a more precise judgement by checking (displacement % \p
2564/// EncodingAlignment).
2566 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2567 MaybeAlign EncodingAlignment) const {
2568 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2569 // a [pc+imm].
2571 return false;
2572
2573 int16_t Imm = 0;
2574 if (N.getOpcode() == ISD::ADD) {
2575 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2576 // SPE load/store can only handle 8-bit offsets.
2577 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2578 return true;
2579 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2580 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2581 return false; // r+i
2582 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2583 return false; // r+i
2584
2585 Base = N.getOperand(0);
2586 Index = N.getOperand(1);
2587 return true;
2588 } else if (N.getOpcode() == ISD::OR) {
2589 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2590 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2591 return false; // r+i can fold it if we can.
2592
2593 // If this is an or of disjoint bitfields, we can codegen this as an add
2594 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2595 // disjoint.
2596 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2597
2598 if (LHSKnown.Zero.getBoolValue()) {
2599 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2600 // If all of the bits are known zero on the LHS or RHS, the add won't
2601 // carry.
2602 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2603 Base = N.getOperand(0);
2604 Index = N.getOperand(1);
2605 return true;
2606 }
2607 }
2608 }
2609
2610 return false;
2611}
2612
2613// If we happen to be doing an i64 load or store into a stack slot that has
2614// less than a 4-byte alignment, then the frame-index elimination may need to
2615// use an indexed load or store instruction (because the offset may not be a
2616// multiple of 4). The extra register needed to hold the offset comes from the
2617// register scavenger, and it is possible that the scavenger will need to use
2618// an emergency spill slot. As a result, we need to make sure that a spill slot
2619// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2620// stack slot.
2621static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2622 // FIXME: This does not handle the LWA case.
2623 if (VT != MVT::i64)
2624 return;
2625
2626 // NOTE: We'll exclude negative FIs here, which come from argument
2627 // lowering, because there are no known test cases triggering this problem
2628 // using packed structures (or similar). We can remove this exclusion if
2629 // we find such a test case. The reason why this is so test-case driven is
2630 // because this entire 'fixup' is only to prevent crashes (from the
2631 // register scavenger) on not-really-valid inputs. For example, if we have:
2632 // %a = alloca i1
2633 // %b = bitcast i1* %a to i64*
2634 // store i64* a, i64 b
2635 // then the store should really be marked as 'align 1', but is not. If it
2636 // were marked as 'align 1' then the indexed form would have been
2637 // instruction-selected initially, and the problem this 'fixup' is preventing
2638 // won't happen regardless.
2639 if (FrameIdx < 0)
2640 return;
2641
2643 MachineFrameInfo &MFI = MF.getFrameInfo();
2644
2645 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2646 return;
2647
2648 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2649 FuncInfo->setHasNonRISpills();
2650}
2651
2652/// Returns true if the address N can be represented by a base register plus
2653/// a signed 16-bit displacement [r+imm], and if it is not better
2654/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2655/// displacements that are multiples of that value.
2657 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2658 MaybeAlign EncodingAlignment) const {
2659 // FIXME dl should come from parent load or store, not from address
2660 SDLoc dl(N);
2661
2662 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2663 // a [pc+imm].
2665 return false;
2666
2667 // If this can be more profitably realized as r+r, fail.
2668 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2669 return false;
2670
2671 if (N.getOpcode() == ISD::ADD) {
2672 int16_t imm = 0;
2673 if (isIntS16Immediate(N.getOperand(1), imm) &&
2674 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2675 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2676 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2677 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2678 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2679 } else {
2680 Base = N.getOperand(0);
2681 }
2682 return true; // [r+i]
2683 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2684 // Match LOAD (ADD (X, Lo(G))).
2685 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2686 "Cannot handle constant offsets yet!");
2687 Disp = N.getOperand(1).getOperand(0); // The global address.
2692 Base = N.getOperand(0);
2693 return true; // [&g+r]
2694 }
2695 } else if (N.getOpcode() == ISD::OR) {
2696 int16_t imm = 0;
2697 if (isIntS16Immediate(N.getOperand(1), imm) &&
2698 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2699 // If this is an or of disjoint bitfields, we can codegen this as an add
2700 // (for better address arithmetic) if the LHS and RHS of the OR are
2701 // provably disjoint.
2702 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2703
2704 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2705 // If all of the bits are known zero on the LHS or RHS, the add won't
2706 // carry.
2707 if (FrameIndexSDNode *FI =
2708 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2709 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2710 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2711 } else {
2712 Base = N.getOperand(0);
2713 }
2714 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2715 return true;
2716 }
2717 }
2718 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2719 // Loading from a constant address.
2720
2721 // If this address fits entirely in a 16-bit sext immediate field, codegen
2722 // this as "d, 0"
2723 int16_t Imm;
2724 if (isIntS16Immediate(CN, Imm) &&
2725 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2726 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2727 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2728 CN->getValueType(0));
2729 return true;
2730 }
2731
2732 // Handle 32-bit sext immediates with LIS + addr mode.
2733 if ((CN->getValueType(0) == MVT::i32 ||
2734 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2735 (!EncodingAlignment ||
2736 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2737 int Addr = (int)CN->getZExtValue();
2738
2739 // Otherwise, break this down into an LIS + disp.
2740 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2741
2742 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2743 MVT::i32);
2744 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2745 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2746 return true;
2747 }
2748 }
2749
2750 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2752 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2753 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2754 } else
2755 Base = N;
2756 return true; // [r+0]
2757}
2758
2759/// Similar to the 16-bit case but for instructions that take a 34-bit
2760/// displacement field (prefixed loads/stores).
2762 SDValue &Base,
2763 SelectionDAG &DAG) const {
2764 // Only on 64-bit targets.
2765 if (N.getValueType() != MVT::i64)
2766 return false;
2767
2768 SDLoc dl(N);
2769 int64_t Imm = 0;
2770
2771 if (N.getOpcode() == ISD::ADD) {
2772 if (!isIntS34Immediate(N.getOperand(1), Imm))
2773 return false;
2774 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2775 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2776 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2777 else
2778 Base = N.getOperand(0);
2779 return true;
2780 }
2781
2782 if (N.getOpcode() == ISD::OR) {
2783 if (!isIntS34Immediate(N.getOperand(1), Imm))
2784 return false;
2785 // If this is an or of disjoint bitfields, we can codegen this as an add
2786 // (for better address arithmetic) if the LHS and RHS of the OR are
2787 // provably disjoint.
2788 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2789 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2790 return false;
2791 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2792 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2793 else
2794 Base = N.getOperand(0);
2795 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2796 return true;
2797 }
2798
2799 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2800 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2801 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2802 return true;
2803 }
2804
2805 return false;
2806}
2807
2808/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2809/// represented as an indexed [r+r] operation.
2811 SDValue &Index,
2812 SelectionDAG &DAG) const {
2813 // Check to see if we can easily represent this as an [r+r] address. This
2814 // will fail if it thinks that the address is more profitably represented as
2815 // reg+imm, e.g. where imm = 0.
2816 if (SelectAddressRegReg(N, Base, Index, DAG))
2817 return true;
2818
2819 // If the address is the result of an add, we will utilize the fact that the
2820 // address calculation includes an implicit add. However, we can reduce
2821 // register pressure if we do not materialize a constant just for use as the
2822 // index register. We only get rid of the add if it is not an add of a
2823 // value and a 16-bit signed constant and both have a single use.
2824 int16_t imm = 0;
2825 if (N.getOpcode() == ISD::ADD &&
2826 (!isIntS16Immediate(N.getOperand(1), imm) ||
2827 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2828 Base = N.getOperand(0);
2829 Index = N.getOperand(1);
2830 return true;
2831 }
2832
2833 // Otherwise, do it the hard way, using R0 as the base register.
2834 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2835 N.getValueType());
2836 Index = N;
2837 return true;
2838}
2839
2840template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2841 Ty *PCRelCand = dyn_cast<Ty>(N);
2842 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2843}
2844
2845/// Returns true if this address is a PC Relative address.
2846/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2847/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2849 // This is a materialize PC Relative node. Always select this as PC Relative.
2850 Base = N;
2851 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2852 return true;
2857 return true;
2858 return false;
2859}
2860
2861/// Returns true if we should use a direct load into vector instruction
2862/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2863static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2864
2865 // If there are any other uses other than scalar to vector, then we should
2866 // keep it as a scalar load -> direct move pattern to prevent multiple
2867 // loads.
2869 if (!LD)
2870 return false;
2871
2872 EVT MemVT = LD->getMemoryVT();
2873 if (!MemVT.isSimple())
2874 return false;
2875 switch(MemVT.getSimpleVT().SimpleTy) {
2876 case MVT::i64:
2877 break;
2878 case MVT::i32:
2879 if (!ST.hasP8Vector())
2880 return false;
2881 break;
2882 case MVT::i16:
2883 case MVT::i8:
2884 if (!ST.hasP9Vector())
2885 return false;
2886 break;
2887 default:
2888 return false;
2889 }
2890
2891 SDValue LoadedVal(N, 0);
2892 if (!LoadedVal.hasOneUse())
2893 return false;
2894
2895 for (SDUse &Use : LD->uses())
2896 if (Use.getResNo() == 0 &&
2897 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2898 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2899 return false;
2900
2901 return true;
2902}
2903
2904/// getPreIndexedAddressParts - returns true by value, base pointer and
2905/// offset pointer and addressing mode by reference if the node's address
2906/// can be legally represented as pre-indexed load / store address.
2908 SDValue &Offset,
2910 SelectionDAG &DAG) const {
2911 if (DisablePPCPreinc) return false;
2912
2913 bool isLoad = true;
2914 SDValue Ptr;
2915 EVT VT;
2916 Align Alignment;
2917 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2918 Ptr = LD->getBasePtr();
2919 VT = LD->getMemoryVT();
2920 Alignment = LD->getAlign();
2921 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2922 Ptr = ST->getBasePtr();
2923 VT = ST->getMemoryVT();
2924 Alignment = ST->getAlign();
2925 isLoad = false;
2926 } else
2927 return false;
2928
2929 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2930 // instructions because we can fold these into a more efficient instruction
2931 // instead, (such as LXSD).
2932 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2933 return false;
2934 }
2935
2936 // PowerPC doesn't have preinc load/store instructions for vectors
2937 if (VT.isVector())
2938 return false;
2939
2940 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2941 // Common code will reject creating a pre-inc form if the base pointer
2942 // is a frame index, or if N is a store and the base pointer is either
2943 // the same as or a predecessor of the value being stored. Check for
2944 // those situations here, and try with swapped Base/Offset instead.
2945 bool Swap = false;
2946
2948 Swap = true;
2949 else if (!isLoad) {
2950 SDValue Val = cast<StoreSDNode>(N)->getValue();
2951 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2952 Swap = true;
2953 }
2954
2955 if (Swap)
2957
2958 AM = ISD::PRE_INC;
2959 return true;
2960 }
2961
2962 // LDU/STU can only handle immediates that are a multiple of 4.
2963 if (VT != MVT::i64) {
2964 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
2965 return false;
2966 } else {
2967 // LDU/STU need an address with at least 4-byte alignment.
2968 if (Alignment < Align(4))
2969 return false;
2970
2971 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2972 return false;
2973 }
2974
2975 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2976 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2977 // sext i32 to i64 when addr mode is r+i.
2978 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2979 LD->getExtensionType() == ISD::SEXTLOAD &&
2981 return false;
2982 }
2983
2984 AM = ISD::PRE_INC;
2985 return true;
2986}
2987
2988//===----------------------------------------------------------------------===//
2989// LowerOperation implementation
2990//===----------------------------------------------------------------------===//
2991
2992/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2993/// and LoOpFlags to the target MO flags.
2994static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2995 unsigned &HiOpFlags, unsigned &LoOpFlags,
2996 const GlobalValue *GV = nullptr) {
2997 HiOpFlags = PPCII::MO_HA;
2998 LoOpFlags = PPCII::MO_LO;
2999
3000 // Don't use the pic base if not in PIC relocation model.
3001 if (IsPIC) {
3002 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3003 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3004 }
3005}
3006
3007static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3008 SelectionDAG &DAG) {
3009 SDLoc DL(HiPart);
3010 EVT PtrVT = HiPart.getValueType();
3011 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3012
3013 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3014 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3015
3016 // With PIC, the first instruction is actually "GR+hi(&G)".
3017 if (isPIC)
3018 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3019 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3020
3021 // Generate non-pic code that has direct accesses to the constant pool.
3022 // The address of the global is just (hi(&g)+lo(&g)).
3023 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3024}
3025
3027 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3028 FuncInfo->setUsesTOCBasePtr();
3029}
3030
3034
3035SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3036 SDValue GA) const {
3037 EVT VT = Subtarget.getScalarIntVT();
3038 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3039 : Subtarget.isAIXABI()
3040 ? DAG.getRegister(PPC::R2, VT)
3041 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3042 SDValue Ops[] = { GA, Reg };
3043 return DAG.getMemIntrinsicNode(
3044 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3047}
3048
3049SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3050 SelectionDAG &DAG) const {
3051 EVT PtrVT = Op.getValueType();
3052 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3053 const Constant *C = CP->getConstVal();
3054
3055 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3056 // The actual address of the GlobalValue is stored in the TOC.
3057 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3058 if (Subtarget.isUsingPCRelativeCalls()) {
3059 SDLoc DL(CP);
3060 EVT Ty = getPointerTy(DAG.getDataLayout());
3061 SDValue ConstPool = DAG.getTargetConstantPool(
3062 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3063 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3064 }
3065 setUsesTOCBasePtr(DAG);
3066 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3067 return getTOCEntry(DAG, SDLoc(CP), GA);
3068 }
3069
3070 unsigned MOHiFlag, MOLoFlag;
3071 bool IsPIC = isPositionIndependent();
3072 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3073
3074 if (IsPIC && Subtarget.isSVR4ABI()) {
3075 SDValue GA =
3077 return getTOCEntry(DAG, SDLoc(CP), GA);
3078 }
3079
3080 SDValue CPIHi =
3081 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3082 SDValue CPILo =
3083 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3084 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3085}
3086
3087// For 64-bit PowerPC, prefer the more compact relative encodings.
3088// This trades 32 bits per jump table entry for one or two instructions
3089// on the jump site.
3096
3099 return false;
3100 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3101 return true;
3103}
3104
3106 SelectionDAG &DAG) const {
3107 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3109
3110 switch (getTargetMachine().getCodeModel()) {
3111 case CodeModel::Small:
3112 case CodeModel::Medium:
3114 default:
3115 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3117 }
3118}
3119
3120const MCExpr *
3122 unsigned JTI,
3123 MCContext &Ctx) const {
3124 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3126
3127 switch (getTargetMachine().getCodeModel()) {
3128 case CodeModel::Small:
3129 case CodeModel::Medium:
3131 default:
3132 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3133 }
3134}
3135
3136SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3137 EVT PtrVT = Op.getValueType();
3139
3140 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3141 if (Subtarget.isUsingPCRelativeCalls()) {
3142 SDLoc DL(JT);
3143 EVT Ty = getPointerTy(DAG.getDataLayout());
3144 SDValue GA =
3146 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3147 return MatAddr;
3148 }
3149
3150 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3151 // The actual address of the GlobalValue is stored in the TOC.
3152 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3153 setUsesTOCBasePtr(DAG);
3154 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3155 return getTOCEntry(DAG, SDLoc(JT), GA);
3156 }
3157
3158 unsigned MOHiFlag, MOLoFlag;
3159 bool IsPIC = isPositionIndependent();
3160 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3161
3162 if (IsPIC && Subtarget.isSVR4ABI()) {
3163 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3165 return getTOCEntry(DAG, SDLoc(GA), GA);
3166 }
3167
3168 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3169 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3170 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3171}
3172
3173SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3174 SelectionDAG &DAG) const {
3175 EVT PtrVT = Op.getValueType();
3176 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3177 const BlockAddress *BA = BASDN->getBlockAddress();
3178
3179 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3180 if (Subtarget.isUsingPCRelativeCalls()) {
3181 SDLoc DL(BASDN);
3182 EVT Ty = getPointerTy(DAG.getDataLayout());
3183 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3185 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3186 return MatAddr;
3187 }
3188
3189 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3190 // The actual BlockAddress is stored in the TOC.
3191 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3192 setUsesTOCBasePtr(DAG);
3193 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3194 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3195 }
3196
3197 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3198 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3199 return getTOCEntry(
3200 DAG, SDLoc(BASDN),
3201 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3202
3203 unsigned MOHiFlag, MOLoFlag;
3204 bool IsPIC = isPositionIndependent();
3205 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3206 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3207 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3208 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3209}
3210
3211SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3212 SelectionDAG &DAG) const {
3213 if (Subtarget.isAIXABI())
3214 return LowerGlobalTLSAddressAIX(Op, DAG);
3215
3216 return LowerGlobalTLSAddressLinux(Op, DAG);
3217}
3218
3219/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3220/// and then apply the update.
3222 SelectionDAG &DAG,
3223 const TargetMachine &TM) {
3224 // Initialize TLS model opt setting lazily:
3225 // (1) Use initial-exec for single TLS var references within current function.
3226 // (2) Use local-dynamic for multiple TLS var references within current
3227 // function.
3228 PPCFunctionInfo *FuncInfo =
3230 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3232 // Iterate over all instructions within current function, collect all TLS
3233 // global variables (global variables taken as the first parameter to
3234 // Intrinsic::threadlocal_address).
3235 const Function &Func = DAG.getMachineFunction().getFunction();
3236 for (const BasicBlock &BB : Func)
3237 for (const Instruction &I : BB)
3238 if (I.getOpcode() == Instruction::Call)
3239 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3240 if (Function *CF = CI->getCalledFunction())
3241 if (CF->isDeclaration() &&
3242 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3243 if (const GlobalValue *GV =
3244 dyn_cast<GlobalValue>(I.getOperand(0))) {
3245 TLSModel::Model GVModel = TM.getTLSModel(GV);
3246 if (GVModel == TLSModel::LocalDynamic)
3247 TLSGV.insert(GV);
3248 }
3249
3250 unsigned TLSGVCnt = TLSGV.size();
3251 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3252 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3253 FuncInfo->setAIXFuncUseTLSIEForLD();
3255 }
3256
3257 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3258 LLVM_DEBUG(
3259 dbgs() << DAG.getMachineFunction().getName()
3260 << " function is using the TLS-IE model for TLS-LD access.\n");
3261 Model = TLSModel::InitialExec;
3262 }
3263}
3264
3265SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3266 SelectionDAG &DAG) const {
3267 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3268
3269 if (DAG.getTarget().useEmulatedTLS())
3270 report_fatal_error("Emulated TLS is not yet supported on AIX");
3271
3272 SDLoc dl(GA);
3273 const GlobalValue *GV = GA->getGlobal();
3274 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3275 bool Is64Bit = Subtarget.isPPC64();
3277
3278 // Apply update to the TLS model.
3279 if (Subtarget.hasAIXShLibTLSModelOpt())
3281
3282 // TLS variables are accessed through TOC entries.
3283 // To support this, set the DAG to use the TOC base pointer.
3284 setUsesTOCBasePtr(DAG);
3285
3286 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3287
3288 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3289 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3290 bool HasAIXSmallTLSGlobalAttr = false;
3291 SDValue VariableOffsetTGA =
3292 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3293 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3294 SDValue TLSReg;
3295
3296 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3297 if (GVar->hasAttribute("aix-small-tls"))
3298 HasAIXSmallTLSGlobalAttr = true;
3299
3300 if (Is64Bit) {
3301 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3302 // involves a load of the variable offset (from the TOC), followed by an
3303 // add of the loaded variable offset to R13 (the thread pointer).
3304 // This code sequence looks like:
3305 // ld reg1,var[TC](2)
3306 // add reg2, reg1, r13 // r13 contains the thread pointer
3307 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3308
3309 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3310 // global variable attribute, produce a faster access sequence for
3311 // local-exec TLS variables where the offset from the TLS base is encoded
3312 // as an immediate operand.
3313 //
3314 // We only utilize the faster local-exec access sequence when the TLS
3315 // variable has a size within the policy limit. We treat types that are
3316 // not sized or are empty as being over the policy size limit.
3317 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3318 IsTLSLocalExecModel) {
3319 Type *GVType = GV->getValueType();
3320 if (GVType->isSized() && !GVType->isEmptyTy() &&
3321 GV->getDataLayout().getTypeAllocSize(GVType) <=
3323 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3324 }
3325 } else {
3326 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3327 // involves loading the variable offset from the TOC, generating a call to
3328 // .__get_tpointer to get the thread pointer (which will be in R3), and
3329 // adding the two together:
3330 // lwz reg1,var[TC](2)
3331 // bla .__get_tpointer
3332 // add reg2, reg1, r3
3333 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3334
3335 // We do not implement the 32-bit version of the faster access sequence
3336 // for local-exec that is controlled by the -maix-small-local-exec-tls
3337 // option, or the "aix-small-tls" global variable attribute.
3338 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3339 report_fatal_error("The small-local-exec TLS access sequence is "
3340 "currently only supported on AIX (64-bit mode).");
3341 }
3342 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3343 }
3344
3345 if (Model == TLSModel::LocalDynamic) {
3346 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3347
3348 // We do not implement the 32-bit version of the faster access sequence
3349 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3350 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3351 report_fatal_error("The small-local-dynamic TLS access sequence is "
3352 "currently only supported on AIX (64-bit mode).");
3353
3354 // For local-dynamic on AIX, we need to generate one TOC entry for each
3355 // variable offset, and a single module-handle TOC entry for the entire
3356 // file.
3357
3358 SDValue VariableOffsetTGA =
3359 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3360 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3361
3363 GlobalVariable *TLSGV =
3364 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3365 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3367 assert(TLSGV && "Not able to create GV for _$TLSML.");
3368 SDValue ModuleHandleTGA =
3369 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3370 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3371 SDValue ModuleHandle =
3372 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3373
3374 // With the -maix-small-local-dynamic-tls option, produce a faster access
3375 // sequence for local-dynamic TLS variables where the offset from the
3376 // module-handle is encoded as an immediate operand.
3377 //
3378 // We only utilize the faster local-dynamic access sequence when the TLS
3379 // variable has a size within the policy limit. We treat types that are
3380 // not sized or are empty as being over the policy size limit.
3381 if (HasAIXSmallLocalDynamicTLS) {
3382 Type *GVType = GV->getValueType();
3383 if (GVType->isSized() && !GVType->isEmptyTy() &&
3384 GV->getDataLayout().getTypeAllocSize(GVType) <=
3386 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3387 ModuleHandle);
3388 }
3389
3390 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3391 }
3392
3393 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3394 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3395 // need to generate two TOC entries, one for the variable offset, one for the
3396 // region handle. The global address for the TOC entry of the region handle is
3397 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3398 // entry of the variable offset is created with MO_TLSGD_FLAG.
3399 SDValue VariableOffsetTGA =
3400 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3401 SDValue RegionHandleTGA =
3402 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3403 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3404 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3405 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3406 RegionHandle);
3407}
3408
3409SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3410 SelectionDAG &DAG) const {
3411 // FIXME: TLS addresses currently use medium model code sequences,
3412 // which is the most useful form. Eventually support for small and
3413 // large models could be added if users need it, at the cost of
3414 // additional complexity.
3415 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3416 if (DAG.getTarget().useEmulatedTLS())
3417 return LowerToTLSEmulatedModel(GA, DAG);
3418
3419 SDLoc dl(GA);
3420 const GlobalValue *GV = GA->getGlobal();
3421 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3422 bool is64bit = Subtarget.isPPC64();
3423 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3424 PICLevel::Level picLevel = M->getPICLevel();
3425
3426 const TargetMachine &TM = getTargetMachine();
3427 TLSModel::Model Model = TM.getTLSModel(GV);
3428
3429 if (Model == TLSModel::LocalExec) {
3430 if (Subtarget.isUsingPCRelativeCalls()) {
3431 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3432 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3434 SDValue MatAddr =
3435 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3436 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3437 }
3438
3439 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3441 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3443 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3444 : DAG.getRegister(PPC::R2, MVT::i32);
3445
3446 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3447 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3448 }
3449
3450 if (Model == TLSModel::InitialExec) {
3451 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3453 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3454 SDValue TGATLS = DAG.getTargetGlobalAddress(
3455 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3456 SDValue TPOffset;
3457 if (IsPCRel) {
3458 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3459 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3460 MachinePointerInfo());
3461 } else {
3462 SDValue GOTPtr;
3463 if (is64bit) {
3464 setUsesTOCBasePtr(DAG);
3465 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3466 GOTPtr =
3467 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3468 } else {
3469 if (!TM.isPositionIndependent())
3470 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3471 else if (picLevel == PICLevel::SmallPIC)
3472 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3473 else
3474 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3475 }
3476 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3477 }
3478 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3479 }
3480
3481 if (Model == TLSModel::GeneralDynamic) {
3482 if (Subtarget.isUsingPCRelativeCalls()) {
3483 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3485 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3486 }
3487
3488 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3489 SDValue GOTPtr;
3490 if (is64bit) {
3491 setUsesTOCBasePtr(DAG);
3492 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3493 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3494 GOTReg, TGA);
3495 } else {
3496 if (picLevel == PICLevel::SmallPIC)
3497 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3498 else
3499 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3500 }
3501 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3502 GOTPtr, TGA, TGA);
3503 }
3504
3505 if (Model == TLSModel::LocalDynamic) {
3506 if (Subtarget.isUsingPCRelativeCalls()) {
3507 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3509 SDValue MatPCRel =
3510 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3511 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3512 }
3513
3514 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3515 SDValue GOTPtr;
3516 if (is64bit) {
3517 setUsesTOCBasePtr(DAG);
3518 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3519 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3520 GOTReg, TGA);
3521 } else {
3522 if (picLevel == PICLevel::SmallPIC)
3523 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3524 else
3525 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3526 }
3527 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3528 PtrVT, GOTPtr, TGA, TGA);
3529 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3530 PtrVT, TLSAddr, TGA);
3531 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3532 }
3533
3534 llvm_unreachable("Unknown TLS model!");
3535}
3536
3537SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3538 SelectionDAG &DAG) const {
3539 EVT PtrVT = Op.getValueType();
3540 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3541 SDLoc DL(GSDN);
3542 const GlobalValue *GV = GSDN->getGlobal();
3543
3544 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3545 // The actual address of the GlobalValue is stored in the TOC.
3546 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3547 if (Subtarget.isUsingPCRelativeCalls()) {
3548 EVT Ty = getPointerTy(DAG.getDataLayout());
3550 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3552 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3553 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3554 MachinePointerInfo());
3555 return Load;
3556 } else {
3557 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3559 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3560 }
3561 }
3562 setUsesTOCBasePtr(DAG);
3563 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3564 return getTOCEntry(DAG, DL, GA);
3565 }
3566
3567 unsigned MOHiFlag, MOLoFlag;
3568 bool IsPIC = isPositionIndependent();
3569 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3570
3571 if (IsPIC && Subtarget.isSVR4ABI()) {
3572 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3573 GSDN->getOffset(),
3575 return getTOCEntry(DAG, DL, GA);
3576 }
3577
3578 SDValue GAHi =
3579 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3580 SDValue GALo =
3581 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3582
3583 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3584}
3585
3586SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3587 bool IsStrict = Op->isStrictFPOpcode();
3588 ISD::CondCode CC =
3589 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3590 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3591 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3592 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3593 EVT LHSVT = LHS.getValueType();
3594 SDLoc dl(Op);
3595
3596 // Soften the setcc with libcall if it is fp128.
3597 if (LHSVT == MVT::f128) {
3598 assert(!Subtarget.hasP9Vector() &&
3599 "SETCC for f128 is already legal under Power9!");
3600 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3601 Op->getOpcode() == ISD::STRICT_FSETCCS);
3602 if (RHS.getNode())
3603 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3604 DAG.getCondCode(CC));
3605 if (IsStrict)
3606 return DAG.getMergeValues({LHS, Chain}, dl);
3607 return LHS;
3608 }
3609
3610 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3611
3612 if (Op.getValueType() == MVT::v2i64) {
3613 // When the operands themselves are v2i64 values, we need to do something
3614 // special because VSX has no underlying comparison operations for these.
3615 if (LHS.getValueType() == MVT::v2i64) {
3616 // Equality can be handled by casting to the legal type for Altivec
3617 // comparisons, everything else needs to be expanded.
3618 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3619 return SDValue();
3620 SDValue SetCC32 = DAG.getSetCC(
3621 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3622 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3623 int ShuffV[] = {1, 0, 3, 2};
3624 SDValue Shuff =
3625 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3626 return DAG.getBitcast(MVT::v2i64,
3627 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3628 dl, MVT::v4i32, Shuff, SetCC32));
3629 }
3630
3631 // We handle most of these in the usual way.
3632 return Op;
3633 }
3634
3635 // If we're comparing for equality to zero, expose the fact that this is
3636 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3637 // fold the new nodes.
3638 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3639 return V;
3640
3641 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3642 // Leave comparisons against 0 and -1 alone for now, since they're usually
3643 // optimized. FIXME: revisit this when we can custom lower all setcc
3644 // optimizations.
3645 if (C->isAllOnes() || C->isZero())
3646 return SDValue();
3647 }
3648
3649 // If we have an integer seteq/setne, turn it into a compare against zero
3650 // by xor'ing the rhs with the lhs, which is faster than setting a
3651 // condition register, reading it back out, and masking the correct bit. The
3652 // normal approach here uses sub to do this instead of xor. Using xor exposes
3653 // the result to other bit-twiddling opportunities.
3654 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3655 EVT VT = Op.getValueType();
3656 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3657 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3658 }
3659 return SDValue();
3660}
3661
3662SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3663 SDNode *Node = Op.getNode();
3664 EVT VT = Node->getValueType(0);
3665 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3666 SDValue InChain = Node->getOperand(0);
3667 SDValue VAListPtr = Node->getOperand(1);
3668 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3669 SDLoc dl(Node);
3670
3671 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3672
3673 // gpr_index
3674 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3675 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3676 InChain = GprIndex.getValue(1);
3677
3678 if (VT == MVT::i64) {
3679 // Check if GprIndex is even
3680 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3681 DAG.getConstant(1, dl, MVT::i32));
3682 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3683 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3684 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3685 DAG.getConstant(1, dl, MVT::i32));
3686 // Align GprIndex to be even if it isn't
3687 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3688 GprIndex);
3689 }
3690
3691 // fpr index is 1 byte after gpr
3692 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3693 DAG.getConstant(1, dl, MVT::i32));
3694
3695 // fpr
3696 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3697 FprPtr, MachinePointerInfo(SV), MVT::i8);
3698 InChain = FprIndex.getValue(1);
3699
3700 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3701 DAG.getConstant(8, dl, MVT::i32));
3702
3703 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3704 DAG.getConstant(4, dl, MVT::i32));
3705
3706 // areas
3707 SDValue OverflowArea =
3708 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3709 InChain = OverflowArea.getValue(1);
3710
3711 SDValue RegSaveArea =
3712 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3713 InChain = RegSaveArea.getValue(1);
3714
3715 // select overflow_area if index > 8
3716 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3717 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3718
3719 // adjustment constant gpr_index * 4/8
3720 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3721 VT.isInteger() ? GprIndex : FprIndex,
3722 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3723 MVT::i32));
3724
3725 // OurReg = RegSaveArea + RegConstant
3726 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3727 RegConstant);
3728
3729 // Floating types are 32 bytes into RegSaveArea
3730 if (VT.isFloatingPoint())
3731 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3732 DAG.getConstant(32, dl, MVT::i32));
3733
3734 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3735 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3736 VT.isInteger() ? GprIndex : FprIndex,
3737 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3738 MVT::i32));
3739
3740 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3741 VT.isInteger() ? VAListPtr : FprPtr,
3742 MachinePointerInfo(SV), MVT::i8);
3743
3744 // determine if we should load from reg_save_area or overflow_area
3745 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3746
3747 // increase overflow_area by 4/8 if gpr/fpr > 8
3748 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3749 DAG.getConstant(VT.isInteger() ? 4 : 8,
3750 dl, MVT::i32));
3751
3752 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3753 OverflowAreaPlusN);
3754
3755 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3756 MachinePointerInfo(), MVT::i32);
3757
3758 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3759}
3760
3761SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3762 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3763
3764 // We have to copy the entire va_list struct:
3765 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3766 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3767 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3768 false, true, /*CI=*/nullptr, std::nullopt,
3769 MachinePointerInfo(), MachinePointerInfo());
3770}
3771
3772SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3773 SelectionDAG &DAG) const {
3774 return Op.getOperand(0);
3775}
3776
3777SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3778 MachineFunction &MF = DAG.getMachineFunction();
3779 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3780
3781 assert((Op.getOpcode() == ISD::INLINEASM ||
3782 Op.getOpcode() == ISD::INLINEASM_BR) &&
3783 "Expecting Inline ASM node.");
3784
3785 // If an LR store is already known to be required then there is not point in
3786 // checking this ASM as well.
3787 if (MFI.isLRStoreRequired())
3788 return Op;
3789
3790 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3791 // type MVT::Glue. We want to ignore this last operand if that is the case.
3792 unsigned NumOps = Op.getNumOperands();
3793 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3794 --NumOps;
3795
3796 // Check all operands that may contain the LR.
3797 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3798 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3799 unsigned NumVals = Flags.getNumOperandRegisters();
3800 ++i; // Skip the ID value.
3801
3802 switch (Flags.getKind()) {
3803 default:
3804 llvm_unreachable("Bad flags!");
3808 i += NumVals;
3809 break;
3813 for (; NumVals; --NumVals, ++i) {
3814 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3815 if (Reg != PPC::LR && Reg != PPC::LR8)
3816 continue;
3817 MFI.setLRStoreRequired();
3818 return Op;
3819 }
3820 break;
3821 }
3822 }
3823 }
3824
3825 return Op;
3826}
3827
3828SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3829 SelectionDAG &DAG) const {
3830 SDValue Chain = Op.getOperand(0);
3831 SDValue Trmp = Op.getOperand(1); // trampoline
3832 SDValue FPtr = Op.getOperand(2); // nested function
3833 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3834 SDLoc dl(Op);
3835
3836 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3837
3838 if (Subtarget.isAIXABI()) {
3839 // On AIX we create a trampoline descriptor by combining the
3840 // entry point and TOC from the global descriptor (FPtr) with the
3841 // nest argument as the environment pointer.
3842 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3843 MaybeAlign PointerAlign(PointerSize);
3844 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3847 : MachineMemOperand::MONone;
3848
3849 uint64_t TOCPointerOffset = 1 * PointerSize;
3850 uint64_t EnvPointerOffset = 2 * PointerSize;
3851 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
3852 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
3853
3854 const Value *TrampolineAddr =
3855 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3856 const Function *Func =
3857 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
3858
3859 SDValue OutChains[3];
3860
3861 // Copy the entry point address from the global descriptor to the
3862 // trampoline buffer.
3863 SDValue LoadEntryPoint =
3864 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
3865 PointerAlign, MMOFlags);
3866 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
3867 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
3868 MachinePointerInfo(TrampolineAddr, 0));
3869
3870 // Copy the TOC pointer from the global descriptor to the trampoline
3871 // buffer.
3872 SDValue TOCFromDescriptorPtr =
3873 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
3874 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
3875 MachinePointerInfo(Func, TOCPointerOffset),
3876 PointerAlign, MMOFlags);
3877 SDValue TrampolineTOCPointer =
3878 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
3879 SDValue TOCLoadChain = TOCReg.getValue(1);
3880 OutChains[1] =
3881 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
3882 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3883
3884 // Store the nest argument into the environment pointer in the trampoline
3885 // buffer.
3886 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
3887 OutChains[2] =
3888 DAG.getStore(Chain, dl, Nest, EnvPointer,
3889 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3890
3892 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
3893 return TokenFactor;
3894 }
3895
3896 bool isPPC64 = (PtrVT == MVT::i64);
3897 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3898
3900 Args.emplace_back(Trmp, IntPtrTy);
3901 // TrampSize == (isPPC64 ? 48 : 40);
3902 Args.emplace_back(
3903 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
3904 IntPtrTy);
3905 Args.emplace_back(FPtr, IntPtrTy);
3906 Args.emplace_back(Nest, IntPtrTy);
3907
3908 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3909 TargetLowering::CallLoweringInfo CLI(DAG);
3910 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3912 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3913
3914 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3915 return CallResult.second;
3916}
3917
3918SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3919 MachineFunction &MF = DAG.getMachineFunction();
3920 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3921 EVT PtrVT = getPointerTy(MF.getDataLayout());
3922
3923 SDLoc dl(Op);
3924
3925 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3926 // vastart just stores the address of the VarArgsFrameIndex slot into the
3927 // memory location argument.
3928 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3929 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3930 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3931 MachinePointerInfo(SV));
3932 }
3933
3934 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3935 // We suppose the given va_list is already allocated.
3936 //
3937 // typedef struct {
3938 // char gpr; /* index into the array of 8 GPRs
3939 // * stored in the register save area
3940 // * gpr=0 corresponds to r3,
3941 // * gpr=1 to r4, etc.
3942 // */
3943 // char fpr; /* index into the array of 8 FPRs
3944 // * stored in the register save area
3945 // * fpr=0 corresponds to f1,
3946 // * fpr=1 to f2, etc.
3947 // */
3948 // char *overflow_arg_area;
3949 // /* location on stack that holds
3950 // * the next overflow argument
3951 // */
3952 // char *reg_save_area;
3953 // /* where r3:r10 and f1:f8 (if saved)
3954 // * are stored
3955 // */
3956 // } va_list[1];
3957
3958 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3959 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3960 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3961 PtrVT);
3962 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3963 PtrVT);
3964
3965 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3966 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3967
3968 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3969 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3970
3971 uint64_t FPROffset = 1;
3972 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3973
3974 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3975
3976 // Store first byte : number of int regs
3977 SDValue firstStore =
3978 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3979 MachinePointerInfo(SV), MVT::i8);
3980 uint64_t nextOffset = FPROffset;
3981 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3982 ConstFPROffset);
3983
3984 // Store second byte : number of float regs
3985 SDValue secondStore =
3986 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3987 MachinePointerInfo(SV, nextOffset), MVT::i8);
3988 nextOffset += StackOffset;
3989 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3990
3991 // Store second word : arguments given on stack
3992 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3993 MachinePointerInfo(SV, nextOffset));
3994 nextOffset += FrameOffset;
3995 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3996
3997 // Store third word : arguments given in registers
3998 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3999 MachinePointerInfo(SV, nextOffset));
4000}
4001
4002/// FPR - The set of FP registers that should be allocated for arguments
4003/// on Darwin and AIX.
4004static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4005 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4006 PPC::F11, PPC::F12, PPC::F13};
4007
4008/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4009/// the stack.
4010static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4011 unsigned PtrByteSize) {
4012 unsigned ArgSize = ArgVT.getStoreSize();
4013 if (Flags.isByVal())
4014 ArgSize = Flags.getByValSize();
4015
4016 // Round up to multiples of the pointer size, except for array members,
4017 // which are always packed.
4018 if (!Flags.isInConsecutiveRegs())
4019 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4020
4021 return ArgSize;
4022}
4023
4024/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4025/// on the stack.
4027 ISD::ArgFlagsTy Flags,
4028 unsigned PtrByteSize) {
4029 Align Alignment(PtrByteSize);
4030
4031 // Altivec parameters are padded to a 16 byte boundary.
4032 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4033 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4034 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4035 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4036 Alignment = Align(16);
4037
4038 // ByVal parameters are aligned as requested.
4039 if (Flags.isByVal()) {
4040 auto BVAlign = Flags.getNonZeroByValAlign();
4041 if (BVAlign > PtrByteSize) {
4042 if (BVAlign.value() % PtrByteSize != 0)
4044 "ByVal alignment is not a multiple of the pointer size");
4045
4046 Alignment = BVAlign;
4047 }
4048 }
4049
4050 // Array members are always packed to their original alignment.
4051 if (Flags.isInConsecutiveRegs()) {
4052 // If the array member was split into multiple registers, the first
4053 // needs to be aligned to the size of the full type. (Except for
4054 // ppcf128, which is only aligned as its f64 components.)
4055 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4056 Alignment = Align(OrigVT.getStoreSize());
4057 else
4058 Alignment = Align(ArgVT.getStoreSize());
4059 }
4060
4061 return Alignment;
4062}
4063
4064/// CalculateStackSlotUsed - Return whether this argument will use its
4065/// stack slot (instead of being passed in registers). ArgOffset,
4066/// AvailableFPRs, and AvailableVRs must hold the current argument
4067/// position, and will be updated to account for this argument.
4068static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4069 unsigned PtrByteSize, unsigned LinkageSize,
4070 unsigned ParamAreaSize, unsigned &ArgOffset,
4071 unsigned &AvailableFPRs,
4072 unsigned &AvailableVRs) {
4073 bool UseMemory = false;
4074
4075 // Respect alignment of argument on the stack.
4076 Align Alignment =
4077 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4078 ArgOffset = alignTo(ArgOffset, Alignment);
4079 // If there's no space left in the argument save area, we must
4080 // use memory (this check also catches zero-sized arguments).
4081 if (ArgOffset >= LinkageSize + ParamAreaSize)
4082 UseMemory = true;
4083
4084 // Allocate argument on the stack.
4085 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4086 if (Flags.isInConsecutiveRegsLast())
4087 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4088 // If we overran the argument save area, we must use memory
4089 // (this check catches arguments passed partially in memory)
4090 if (ArgOffset > LinkageSize + ParamAreaSize)
4091 UseMemory = true;
4092
4093 // However, if the argument is actually passed in an FPR or a VR,
4094 // we don't use memory after all.
4095 if (!Flags.isByVal()) {
4096 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4097 if (AvailableFPRs > 0) {
4098 --AvailableFPRs;
4099 return false;
4100 }
4101 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4102 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4103 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4104 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4105 if (AvailableVRs > 0) {
4106 --AvailableVRs;
4107 return false;
4108 }
4109 }
4110
4111 return UseMemory;
4112}
4113
4114/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4115/// ensure minimum alignment required for target.
4117 unsigned NumBytes) {
4118 return alignTo(NumBytes, Lowering->getStackAlign());
4119}
4120
4121SDValue PPCTargetLowering::LowerFormalArguments(
4122 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4123 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4124 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4125 if (Subtarget.isAIXABI())
4126 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4127 InVals);
4128 if (Subtarget.is64BitELFABI())
4129 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4130 InVals);
4131 assert(Subtarget.is32BitELFABI());
4132 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4133 InVals);
4134}
4135
4136SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4137 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4138 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4139 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4140
4141 // 32-bit SVR4 ABI Stack Frame Layout:
4142 // +-----------------------------------+
4143 // +--> | Back chain |
4144 // | +-----------------------------------+
4145 // | | Floating-point register save area |
4146 // | +-----------------------------------+
4147 // | | General register save area |
4148 // | +-----------------------------------+
4149 // | | CR save word |
4150 // | +-----------------------------------+
4151 // | | VRSAVE save word |
4152 // | +-----------------------------------+
4153 // | | Alignment padding |
4154 // | +-----------------------------------+
4155 // | | Vector register save area |
4156 // | +-----------------------------------+
4157 // | | Local variable space |
4158 // | +-----------------------------------+
4159 // | | Parameter list area |
4160 // | +-----------------------------------+
4161 // | | LR save word |
4162 // | +-----------------------------------+
4163 // SP--> +--- | Back chain |
4164 // +-----------------------------------+
4165 //
4166 // Specifications:
4167 // System V Application Binary Interface PowerPC Processor Supplement
4168 // AltiVec Technology Programming Interface Manual
4169
4170 MachineFunction &MF = DAG.getMachineFunction();
4171 MachineFrameInfo &MFI = MF.getFrameInfo();
4172 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4173
4174 EVT PtrVT = getPointerTy(MF.getDataLayout());
4175 // Potential tail calls could cause overwriting of argument stack slots.
4176 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4177 (CallConv == CallingConv::Fast));
4178 const Align PtrAlign(4);
4179
4180 // Assign locations to all of the incoming arguments.
4182 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4183 *DAG.getContext());
4184
4185 // Reserve space for the linkage area on the stack.
4186 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4187 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4188 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4189
4190 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4191 CCValAssign &VA = ArgLocs[i];
4192
4193 // Arguments stored in registers.
4194 if (VA.isRegLoc()) {
4195 const TargetRegisterClass *RC;
4196 EVT ValVT = VA.getValVT();
4197
4198 switch (ValVT.getSimpleVT().SimpleTy) {
4199 default:
4200 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4201 case MVT::i1:
4202 case MVT::i32:
4203 RC = &PPC::GPRCRegClass;
4204 break;
4205 case MVT::f32:
4206 if (Subtarget.hasP8Vector())
4207 RC = &PPC::VSSRCRegClass;
4208 else if (Subtarget.hasSPE())
4209 RC = &PPC::GPRCRegClass;
4210 else
4211 RC = &PPC::F4RCRegClass;
4212 break;
4213 case MVT::f64:
4214 if (Subtarget.hasVSX())
4215 RC = &PPC::VSFRCRegClass;
4216 else if (Subtarget.hasSPE())
4217 // SPE passes doubles in GPR pairs.
4218 RC = &PPC::GPRCRegClass;
4219 else
4220 RC = &PPC::F8RCRegClass;
4221 break;
4222 case MVT::v16i8:
4223 case MVT::v8i16:
4224 case MVT::v4i32:
4225 RC = &PPC::VRRCRegClass;
4226 break;
4227 case MVT::v4f32:
4228 RC = &PPC::VRRCRegClass;
4229 break;
4230 case MVT::v2f64:
4231 case MVT::v2i64:
4232 RC = &PPC::VRRCRegClass;
4233 break;
4234 }
4235
4236 SDValue ArgValue;
4237 // Transform the arguments stored in physical registers into
4238 // virtual ones.
4239 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4240 assert(i + 1 < e && "No second half of double precision argument");
4241 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4242 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4243 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4244 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4245 if (!Subtarget.isLittleEndian())
4246 std::swap (ArgValueLo, ArgValueHi);
4247 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4248 ArgValueHi);
4249 } else {
4250 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4251 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4252 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4253 if (ValVT == MVT::i1)
4254 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4255 }
4256
4257 InVals.push_back(ArgValue);
4258 } else {
4259 // Argument stored in memory.
4260 assert(VA.isMemLoc());
4261
4262 // Get the extended size of the argument type in stack
4263 unsigned ArgSize = VA.getLocVT().getStoreSize();
4264 // Get the actual size of the argument type
4265 unsigned ObjSize = VA.getValVT().getStoreSize();
4266 unsigned ArgOffset = VA.getLocMemOffset();
4267 // Stack objects in PPC32 are right justified.
4268 ArgOffset += ArgSize - ObjSize;
4269 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4270
4271 // Create load nodes to retrieve arguments from the stack.
4272 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4273 InVals.push_back(
4274 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4275 }
4276 }
4277
4278 // Assign locations to all of the incoming aggregate by value arguments.
4279 // Aggregates passed by value are stored in the local variable space of the
4280 // caller's stack frame, right above the parameter list area.
4281 SmallVector<CCValAssign, 16> ByValArgLocs;
4282 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4283 ByValArgLocs, *DAG.getContext());
4284
4285 // Reserve stack space for the allocations in CCInfo.
4286 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4287
4288 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4289
4290 // Area that is at least reserved in the caller of this function.
4291 unsigned MinReservedArea = CCByValInfo.getStackSize();
4292 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4293
4294 // Set the size that is at least reserved in caller of this function. Tail
4295 // call optimized function's reserved stack space needs to be aligned so that
4296 // taking the difference between two stack areas will result in an aligned
4297 // stack.
4298 MinReservedArea =
4299 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4300 FuncInfo->setMinReservedArea(MinReservedArea);
4301
4303
4304 // If the function takes variable number of arguments, make a frame index for
4305 // the start of the first vararg value... for expansion of llvm.va_start.
4306 if (isVarArg) {
4307 static const MCPhysReg GPArgRegs[] = {
4308 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4309 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4310 };
4311 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4312
4313 static const MCPhysReg FPArgRegs[] = {
4314 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4315 PPC::F8
4316 };
4317 unsigned NumFPArgRegs = std::size(FPArgRegs);
4318
4319 if (useSoftFloat() || hasSPE())
4320 NumFPArgRegs = 0;
4321
4322 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4323 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4324
4325 // Make room for NumGPArgRegs and NumFPArgRegs.
4326 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4327 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4328
4330 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4331
4332 FuncInfo->setVarArgsFrameIndex(
4333 MFI.CreateStackObject(Depth, Align(8), false));
4334 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4335
4336 // The fixed integer arguments of a variadic function are stored to the
4337 // VarArgsFrameIndex on the stack so that they may be loaded by
4338 // dereferencing the result of va_next.
4339 for (MCPhysReg GPArgReg : GPArgRegs) {
4340 // Get an existing live-in vreg, or add a new one.
4341 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4342 if (!VReg)
4343 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4344
4345 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4346 SDValue Store =
4347 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4348 MemOps.push_back(Store);
4349 // Increment the address by four for the next argument to store
4350 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4351 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4352 }
4353
4354 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4355 // is set.
4356 // The double arguments are stored to the VarArgsFrameIndex
4357 // on the stack.
4358 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4359 // Get an existing live-in vreg, or add a new one.
4360 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4361 if (!VReg)
4362 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4363
4364 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4365 SDValue Store =
4366 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4367 MemOps.push_back(Store);
4368 // Increment the address by eight for the next argument to store
4369 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4370 PtrVT);
4371 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4372 }
4373 }
4374
4375 if (!MemOps.empty())
4376 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4377
4378 return Chain;
4379}
4380
4381// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4382// value to MVT::i64 and then truncate to the correct register size.
4383SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4384 EVT ObjectVT, SelectionDAG &DAG,
4385 SDValue ArgVal,
4386 const SDLoc &dl) const {
4387 if (Flags.isSExt())
4388 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4389 DAG.getValueType(ObjectVT));
4390 else if (Flags.isZExt())
4391 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4392 DAG.getValueType(ObjectVT));
4393
4394 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4395}
4396
4397SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4398 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4399 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4400 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4401 // TODO: add description of PPC stack frame format, or at least some docs.
4402 //
4403 bool isELFv2ABI = Subtarget.isELFv2ABI();
4404 bool isLittleEndian = Subtarget.isLittleEndian();
4405 MachineFunction &MF = DAG.getMachineFunction();
4406 MachineFrameInfo &MFI = MF.getFrameInfo();
4407 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4408
4409 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4410 "fastcc not supported on varargs functions");
4411
4412 EVT PtrVT = getPointerTy(MF.getDataLayout());
4413 // Potential tail calls could cause overwriting of argument stack slots.
4414 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4415 (CallConv == CallingConv::Fast));
4416 unsigned PtrByteSize = 8;
4417 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4418
4419 static const MCPhysReg GPR[] = {
4420 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4421 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4422 };
4423 static const MCPhysReg VR[] = {
4424 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4425 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4426 };
4427
4428 const unsigned Num_GPR_Regs = std::size(GPR);
4429 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4430 const unsigned Num_VR_Regs = std::size(VR);
4431
4432 // Do a first pass over the arguments to determine whether the ABI
4433 // guarantees that our caller has allocated the parameter save area
4434 // on its stack frame. In the ELFv1 ABI, this is always the case;
4435 // in the ELFv2 ABI, it is true if this is a vararg function or if
4436 // any parameter is located in a stack slot.
4437
4438 bool HasParameterArea = !isELFv2ABI || isVarArg;
4439 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4440 unsigned NumBytes = LinkageSize;
4441 unsigned AvailableFPRs = Num_FPR_Regs;
4442 unsigned AvailableVRs = Num_VR_Regs;
4443 for (const ISD::InputArg &In : Ins) {
4444 if (In.Flags.isNest())
4445 continue;
4446
4447 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4448 LinkageSize, ParamAreaSize, NumBytes,
4449 AvailableFPRs, AvailableVRs))
4450 HasParameterArea = true;
4451 }
4452
4453 // Add DAG nodes to load the arguments or copy them out of registers. On
4454 // entry to a function on PPC, the arguments start after the linkage area,
4455 // although the first ones are often in registers.
4456
4457 unsigned ArgOffset = LinkageSize;
4458 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4461 unsigned CurArgIdx = 0;
4462 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4463 SDValue ArgVal;
4464 bool needsLoad = false;
4465 EVT ObjectVT = Ins[ArgNo].VT;
4466 EVT OrigVT = Ins[ArgNo].ArgVT;
4467 unsigned ObjSize = ObjectVT.getStoreSize();
4468 unsigned ArgSize = ObjSize;
4469 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4470 if (Ins[ArgNo].isOrigArg()) {
4471 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4472 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4473 }
4474 // We re-align the argument offset for each argument, except when using the
4475 // fast calling convention, when we need to make sure we do that only when
4476 // we'll actually use a stack slot.
4477 unsigned CurArgOffset;
4478 Align Alignment;
4479 auto ComputeArgOffset = [&]() {
4480 /* Respect alignment of argument on the stack. */
4481 Alignment =
4482 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4483 ArgOffset = alignTo(ArgOffset, Alignment);
4484 CurArgOffset = ArgOffset;
4485 };
4486
4487 if (CallConv != CallingConv::Fast) {
4488 ComputeArgOffset();
4489
4490 /* Compute GPR index associated with argument offset. */
4491 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4492 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4493 }
4494
4495 // FIXME the codegen can be much improved in some cases.
4496 // We do not have to keep everything in memory.
4497 if (Flags.isByVal()) {
4498 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4499
4500 if (CallConv == CallingConv::Fast)
4501 ComputeArgOffset();
4502
4503 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4504 ObjSize = Flags.getByValSize();
4505 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4506 // Empty aggregate parameters do not take up registers. Examples:
4507 // struct { } a;
4508 // union { } b;
4509 // int c[0];
4510 // etc. However, we have to provide a place-holder in InVals, so
4511 // pretend we have an 8-byte item at the current address for that
4512 // purpose.
4513 if (!ObjSize) {
4514 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4515 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4516 InVals.push_back(FIN);
4517 continue;
4518 }
4519
4520 // Create a stack object covering all stack doublewords occupied
4521 // by the argument. If the argument is (fully or partially) on
4522 // the stack, or if the argument is fully in registers but the
4523 // caller has allocated the parameter save anyway, we can refer
4524 // directly to the caller's stack frame. Otherwise, create a
4525 // local copy in our own frame.
4526 int FI;
4527 if (HasParameterArea ||
4528 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4529 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4530 else
4531 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4532 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4533
4534 // Handle aggregates smaller than 8 bytes.
4535 if (ObjSize < PtrByteSize) {
4536 // The value of the object is its address, which differs from the
4537 // address of the enclosing doubleword on big-endian systems.
4538 SDValue Arg = FIN;
4539 if (!isLittleEndian) {
4540 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4541 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4542 }
4543 InVals.push_back(Arg);
4544
4545 if (GPR_idx != Num_GPR_Regs) {
4546 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4547 FuncInfo->addLiveInAttr(VReg, Flags);
4548 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4549 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4550 SDValue Store =
4551 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4552 MachinePointerInfo(&*FuncArg), ObjType);
4553 MemOps.push_back(Store);
4554 }
4555 // Whether we copied from a register or not, advance the offset
4556 // into the parameter save area by a full doubleword.
4557 ArgOffset += PtrByteSize;
4558 continue;
4559 }
4560
4561 // The value of the object is its address, which is the address of
4562 // its first stack doubleword.
4563 InVals.push_back(FIN);
4564
4565 // Store whatever pieces of the object are in registers to memory.
4566 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4567 if (GPR_idx == Num_GPR_Regs)
4568 break;
4569
4570 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4571 FuncInfo->addLiveInAttr(VReg, Flags);
4572 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4573 SDValue Addr = FIN;
4574 if (j) {
4575 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4576 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4577 }
4578 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4579 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4580 SDValue Store =
4581 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4582 MachinePointerInfo(&*FuncArg, j), ObjType);
4583 MemOps.push_back(Store);
4584 ++GPR_idx;
4585 }
4586 ArgOffset += ArgSize;
4587 continue;
4588 }
4589
4590 switch (ObjectVT.getSimpleVT().SimpleTy) {
4591 default: llvm_unreachable("Unhandled argument type!");
4592 case MVT::i1:
4593 case MVT::i32:
4594 case MVT::i64:
4595 if (Flags.isNest()) {
4596 // The 'nest' parameter, if any, is passed in R11.
4597 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4598 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4599
4600 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4601 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4602
4603 break;
4604 }
4605
4606 // These can be scalar arguments or elements of an integer array type
4607 // passed directly. Clang may use those instead of "byval" aggregate
4608 // types to avoid forcing arguments to memory unnecessarily.
4609 if (GPR_idx != Num_GPR_Regs) {
4610 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4611 FuncInfo->addLiveInAttr(VReg, Flags);
4612 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4613
4614 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4615 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4616 // value to MVT::i64 and then truncate to the correct register size.
4617 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4618 } else {
4619 if (CallConv == CallingConv::Fast)
4620 ComputeArgOffset();
4621
4622 needsLoad = true;
4623 ArgSize = PtrByteSize;
4624 }
4625 if (CallConv != CallingConv::Fast || needsLoad)
4626 ArgOffset += 8;
4627 break;
4628
4629 case MVT::f32:
4630 case MVT::f64:
4631 // These can be scalar arguments or elements of a float array type
4632 // passed directly. The latter are used to implement ELFv2 homogenous
4633 // float aggregates.
4634 if (FPR_idx != Num_FPR_Regs) {
4635 unsigned VReg;
4636
4637 if (ObjectVT == MVT::f32)
4638 VReg = MF.addLiveIn(FPR[FPR_idx],
4639 Subtarget.hasP8Vector()
4640 ? &PPC::VSSRCRegClass
4641 : &PPC::F4RCRegClass);
4642 else
4643 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4644 ? &PPC::VSFRCRegClass
4645 : &PPC::F8RCRegClass);
4646
4647 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4648 ++FPR_idx;
4649 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4650 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4651 // once we support fp <-> gpr moves.
4652
4653 // This can only ever happen in the presence of f32 array types,
4654 // since otherwise we never run out of FPRs before running out
4655 // of GPRs.
4656 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4657 FuncInfo->addLiveInAttr(VReg, Flags);
4658 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4659
4660 if (ObjectVT == MVT::f32) {
4661 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4662 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4663 DAG.getConstant(32, dl, MVT::i32));
4664 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4665 }
4666
4667 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4668 } else {
4669 if (CallConv == CallingConv::Fast)
4670 ComputeArgOffset();
4671
4672 needsLoad = true;
4673 }
4674
4675 // When passing an array of floats, the array occupies consecutive
4676 // space in the argument area; only round up to the next doubleword
4677 // at the end of the array. Otherwise, each float takes 8 bytes.
4678 if (CallConv != CallingConv::Fast || needsLoad) {
4679 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4680 ArgOffset += ArgSize;
4681 if (Flags.isInConsecutiveRegsLast())
4682 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4683 }
4684 break;
4685 case MVT::v4f32:
4686 case MVT::v4i32:
4687 case MVT::v8i16:
4688 case MVT::v16i8:
4689 case MVT::v2f64:
4690 case MVT::v2i64:
4691 case MVT::v1i128:
4692 case MVT::f128:
4693 // These can be scalar arguments or elements of a vector array type
4694 // passed directly. The latter are used to implement ELFv2 homogenous
4695 // vector aggregates.
4696 if (VR_idx != Num_VR_Regs) {
4697 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4698 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4699 ++VR_idx;
4700 } else {
4701 if (CallConv == CallingConv::Fast)
4702 ComputeArgOffset();
4703 needsLoad = true;
4704 }
4705 if (CallConv != CallingConv::Fast || needsLoad)
4706 ArgOffset += 16;
4707 break;
4708 }
4709
4710 // We need to load the argument to a virtual register if we determined
4711 // above that we ran out of physical registers of the appropriate type.
4712 if (needsLoad) {
4713 if (ObjSize < ArgSize && !isLittleEndian)
4714 CurArgOffset += ArgSize - ObjSize;
4715 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4716 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4717 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4718 }
4719
4720 InVals.push_back(ArgVal);
4721 }
4722
4723 // Area that is at least reserved in the caller of this function.
4724 unsigned MinReservedArea;
4725 if (HasParameterArea)
4726 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4727 else
4728 MinReservedArea = LinkageSize;
4729
4730 // Set the size that is at least reserved in caller of this function. Tail
4731 // call optimized functions' reserved stack space needs to be aligned so that
4732 // taking the difference between two stack areas will result in an aligned
4733 // stack.
4734 MinReservedArea =
4735 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4736 FuncInfo->setMinReservedArea(MinReservedArea);
4737
4738 // If the function takes variable number of arguments, make a frame index for
4739 // the start of the first vararg value... for expansion of llvm.va_start.
4740 // On ELFv2ABI spec, it writes:
4741 // C programs that are intended to be *portable* across different compilers
4742 // and architectures must use the header file <stdarg.h> to deal with variable
4743 // argument lists.
4744 if (isVarArg && MFI.hasVAStart()) {
4745 int Depth = ArgOffset;
4746
4747 FuncInfo->setVarArgsFrameIndex(
4748 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4749 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4750
4751 // If this function is vararg, store any remaining integer argument regs
4752 // to their spots on the stack so that they may be loaded by dereferencing
4753 // the result of va_next.
4754 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4755 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4756 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4757 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4758 SDValue Store =
4759 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4760 MemOps.push_back(Store);
4761 // Increment the address by four for the next argument to store
4762 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4763 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4764 }
4765 }
4766
4767 if (!MemOps.empty())
4768 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4769
4770 return Chain;
4771}
4772
4773/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4774/// adjusted to accommodate the arguments for the tailcall.
4775static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4776 unsigned ParamSize) {
4777
4778 if (!isTailCall) return 0;
4779
4781 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4782 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4783 // Remember only if the new adjustment is bigger.
4784 if (SPDiff < FI->getTailCallSPDelta())
4785 FI->setTailCallSPDelta(SPDiff);
4786
4787 return SPDiff;
4788}
4789
4790static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4791
4792static bool callsShareTOCBase(const Function *Caller,
4793 const GlobalValue *CalleeGV,
4794 const TargetMachine &TM) {
4795 // It does not make sense to call callsShareTOCBase() with a caller that
4796 // is PC Relative since PC Relative callers do not have a TOC.
4797#ifndef NDEBUG
4798 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4799 assert(!STICaller->isUsingPCRelativeCalls() &&
4800 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4801#endif
4802
4803 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4804 // don't have enough information to determine if the caller and callee share
4805 // the same TOC base, so we have to pessimistically assume they don't for
4806 // correctness.
4807 if (!CalleeGV)
4808 return false;
4809
4810 // If the callee is preemptable, then the static linker will use a plt-stub
4811 // which saves the toc to the stack, and needs a nop after the call
4812 // instruction to convert to a toc-restore.
4813 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4814 return false;
4815
4816 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4817 // We may need a TOC restore in the situation where the caller requires a
4818 // valid TOC but the callee is PC Relative and does not.
4819 const Function *F = dyn_cast<Function>(CalleeGV);
4820 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4821
4822 // If we have an Alias we can try to get the function from there.
4823 if (Alias) {
4824 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4825 F = dyn_cast<Function>(GlobalObj);
4826 }
4827
4828 // If we still have no valid function pointer we do not have enough
4829 // information to determine if the callee uses PC Relative calls so we must
4830 // assume that it does.
4831 if (!F)
4832 return false;
4833
4834 // If the callee uses PC Relative we cannot guarantee that the callee won't
4835 // clobber the TOC of the caller and so we must assume that the two
4836 // functions do not share a TOC base.
4837 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4838 if (STICallee->isUsingPCRelativeCalls())
4839 return false;
4840
4841 // If the GV is not a strong definition then we need to assume it can be
4842 // replaced by another function at link time. The function that replaces
4843 // it may not share the same TOC as the caller since the callee may be
4844 // replaced by a PC Relative version of the same function.
4845 if (!CalleeGV->isStrongDefinitionForLinker())
4846 return false;
4847
4848 // The medium and large code models are expected to provide a sufficiently
4849 // large TOC to provide all data addressing needs of a module with a
4850 // single TOC.
4851 if (CodeModel::Medium == TM.getCodeModel() ||
4853 return true;
4854
4855 // Any explicitly-specified sections and section prefixes must also match.
4856 // Also, if we're using -ffunction-sections, then each function is always in
4857 // a different section (the same is true for COMDAT functions).
4858 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4859 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4860 return false;
4861 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4862 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4863 return false;
4864 }
4865
4866 return true;
4867}
4868
4869static bool
4871 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4872 assert(Subtarget.is64BitELFABI());
4873
4874 const unsigned PtrByteSize = 8;
4875 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4876
4877 static const MCPhysReg GPR[] = {
4878 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4879 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4880 };
4881 static const MCPhysReg VR[] = {
4882 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4883 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4884 };
4885
4886 const unsigned NumGPRs = std::size(GPR);
4887 const unsigned NumFPRs = 13;
4888 const unsigned NumVRs = std::size(VR);
4889 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4890
4891 unsigned NumBytes = LinkageSize;
4892 unsigned AvailableFPRs = NumFPRs;
4893 unsigned AvailableVRs = NumVRs;
4894
4895 for (const ISD::OutputArg& Param : Outs) {
4896 if (Param.Flags.isNest()) continue;
4897
4898 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4899 LinkageSize, ParamAreaSize, NumBytes,
4900 AvailableFPRs, AvailableVRs))
4901 return true;
4902 }
4903 return false;
4904}
4905
4906static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4907 if (CB.arg_size() != CallerFn->arg_size())
4908 return false;
4909
4910 auto CalleeArgIter = CB.arg_begin();
4911 auto CalleeArgEnd = CB.arg_end();
4912 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4913
4914 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4915 const Value* CalleeArg = *CalleeArgIter;
4916 const Value* CallerArg = &(*CallerArgIter);
4917 if (CalleeArg == CallerArg)
4918 continue;
4919
4920 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4921 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4922 // }
4923 // 1st argument of callee is undef and has the same type as caller.
4924 if (CalleeArg->getType() == CallerArg->getType() &&
4925 isa<UndefValue>(CalleeArg))
4926 continue;
4927
4928 return false;
4929 }
4930
4931 return true;
4932}
4933
4934// Returns true if TCO is possible between the callers and callees
4935// calling conventions.
4936static bool
4938 CallingConv::ID CalleeCC) {
4939 // Tail calls are possible with fastcc and ccc.
4940 auto isTailCallableCC = [] (CallingConv::ID CC){
4941 return CC == CallingConv::C || CC == CallingConv::Fast;
4942 };
4943 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4944 return false;
4945
4946 // We can safely tail call both fastcc and ccc callees from a c calling
4947 // convention caller. If the caller is fastcc, we may have less stack space
4948 // than a non-fastcc caller with the same signature so disable tail-calls in
4949 // that case.
4950 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4951}
4952
4953bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4954 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4955 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4957 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4958 bool isCalleeExternalSymbol) const {
4959 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4960
4961 if (DisableSCO && !TailCallOpt) return false;
4962
4963 // Variadic argument functions are not supported.
4964 if (isVarArg) return false;
4965
4966 // Check that the calling conventions are compatible for tco.
4967 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4968 return false;
4969
4970 // Caller contains any byval parameter is not supported.
4971 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4972 return false;
4973
4974 // Callee contains any byval parameter is not supported, too.
4975 // Note: This is a quick work around, because in some cases, e.g.
4976 // caller's stack size > callee's stack size, we are still able to apply
4977 // sibling call optimization. For example, gcc is able to do SCO for caller1
4978 // in the following example, but not for caller2.
4979 // struct test {
4980 // long int a;
4981 // char ary[56];
4982 // } gTest;
4983 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4984 // b->a = v.a;
4985 // return 0;
4986 // }
4987 // void caller1(struct test a, struct test c, struct test *b) {
4988 // callee(gTest, b); }
4989 // void caller2(struct test *b) { callee(gTest, b); }
4990 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4991 return false;
4992
4993 // If callee and caller use different calling conventions, we cannot pass
4994 // parameters on stack since offsets for the parameter area may be different.
4995 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4996 return false;
4997
4998 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4999 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5000 // callee potentially have different TOC bases then we cannot tail call since
5001 // we need to restore the TOC pointer after the call.
5002 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5003 // We cannot guarantee this for indirect calls or calls to external functions.
5004 // When PC-Relative addressing is used, the concept of the TOC is no longer
5005 // applicable so this check is not required.
5006 // Check first for indirect calls.
5007 if (!Subtarget.isUsingPCRelativeCalls() &&
5008 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5009 return false;
5010
5011 // Check if we share the TOC base.
5012 if (!Subtarget.isUsingPCRelativeCalls() &&
5013 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5014 return false;
5015
5016 // TCO allows altering callee ABI, so we don't have to check further.
5017 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5018 return true;
5019
5020 if (DisableSCO) return false;
5021
5022 // If callee use the same argument list that caller is using, then we can
5023 // apply SCO on this case. If it is not, then we need to check if callee needs
5024 // stack for passing arguments.
5025 // PC Relative tail calls may not have a CallBase.
5026 // If there is no CallBase we cannot verify if we have the same argument
5027 // list so assume that we don't have the same argument list.
5028 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5029 needStackSlotPassParameters(Subtarget, Outs))
5030 return false;
5031 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5032 return false;
5033
5034 return true;
5035}
5036
5037/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5038/// for tail call optimization. Targets which want to do tail call
5039/// optimization should implement this function.
5040bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5041 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5042 CallingConv::ID CallerCC, bool isVarArg,
5043 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5044 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5045 return false;
5046
5047 // Variable argument functions are not supported.
5048 if (isVarArg)
5049 return false;
5050
5051 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5052 // Functions containing by val parameters are not supported.
5053 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5054 return false;
5055
5056 // Non-PIC/GOT tail calls are supported.
5057 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5058 return true;
5059
5060 // At the moment we can only do local tail calls (in same module, hidden
5061 // or protected) if we are generating PIC.
5062 if (CalleeGV)
5063 return CalleeGV->hasHiddenVisibility() ||
5064 CalleeGV->hasProtectedVisibility();
5065 }
5066
5067 return false;
5068}
5069
5070/// isCallCompatibleAddress - Return the immediate to use if the specified
5071/// 32-bit value is representable in the immediate field of a BxA instruction.
5074 if (!C) return nullptr;
5075
5076 int Addr = C->getZExtValue();
5077 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5078 SignExtend32<26>(Addr) != Addr)
5079 return nullptr; // Top 6 bits have to be sext of immediate.
5080
5081 return DAG
5083 (int)C->getZExtValue() >> 2, SDLoc(Op),
5085 .getNode();
5086}
5087
5088namespace {
5089
5090struct TailCallArgumentInfo {
5091 SDValue Arg;
5092 SDValue FrameIdxOp;
5093 int FrameIdx = 0;
5094
5095 TailCallArgumentInfo() = default;
5096};
5097
5098} // end anonymous namespace
5099
5100/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5102 SelectionDAG &DAG, SDValue Chain,
5103 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5104 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5105 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5106 SDValue Arg = TailCallArgs[i].Arg;
5107 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5108 int FI = TailCallArgs[i].FrameIdx;
5109 // Store relative to framepointer.
5110 MemOpChains.push_back(DAG.getStore(
5111 Chain, dl, Arg, FIN,
5113 }
5114}
5115
5116/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5117/// the appropriate stack slot for the tail call optimized function call.
5119 SDValue OldRetAddr, SDValue OldFP,
5120 int SPDiff, const SDLoc &dl) {
5121 if (SPDiff) {
5122 // Calculate the new stack slot for the return address.
5124 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5125 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5126 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5127 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5128 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5129 NewRetAddrLoc, true);
5130 SDValue NewRetAddrFrIdx =
5131 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5132 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5133 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5134 }
5135 return Chain;
5136}
5137
5138/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5139/// the position of the argument.
5141 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5142 int SPDiff, unsigned ArgOffset,
5143 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5144 int Offset = ArgOffset + SPDiff;
5145 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5146 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5147 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5148 SDValue FIN = DAG.getFrameIndex(FI, VT);
5149 TailCallArgumentInfo Info;
5150 Info.Arg = Arg;
5151 Info.FrameIdxOp = FIN;
5152 Info.FrameIdx = FI;
5153 TailCallArguments.push_back(Info);
5154}
5155
5156/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5157/// stack slot. Returns the chain as result and the loaded frame pointers in
5158/// LROpOut/FPOpout. Used when tail calling.
5159SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5160 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5161 SDValue &FPOpOut, const SDLoc &dl) const {
5162 if (SPDiff) {
5163 // Load the LR and FP stack slot for later adjusting.
5164 LROpOut = getReturnAddrFrameIndex(DAG);
5165 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5166 MachinePointerInfo());
5167 Chain = SDValue(LROpOut.getNode(), 1);
5168 }
5169 return Chain;
5170}
5171
5172/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5173/// by "Src" to address "Dst" of size "Size". Alignment information is
5174/// specified by the specific parameter attribute. The copy will be passed as
5175/// a byval function parameter.
5176/// Sometimes what we are copying is the end of a larger object, the part that
5177/// does not fit in registers.
5179 SDValue Chain, ISD::ArgFlagsTy Flags,
5180 SelectionDAG &DAG, const SDLoc &dl) {
5181 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5182 return DAG.getMemcpy(
5183 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5184 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5185}
5186
5187/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5188/// tail calls.
5190 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5191 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5192 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5193 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5195 if (!isTailCall) {
5196 if (isVector) {
5197 SDValue StackPtr;
5198 if (isPPC64)
5199 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5200 else
5201 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5202 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5203 DAG.getConstant(ArgOffset, dl, PtrVT));
5204 }
5205 MemOpChains.push_back(
5206 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5207 // Calculate and remember argument location.
5208 } else
5209 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5210 TailCallArguments);
5211}
5212
5213static void
5215 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5216 SDValue FPOp,
5217 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5218 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5219 // might overwrite each other in case of tail call optimization.
5220 SmallVector<SDValue, 8> MemOpChains2;
5221 // Do not flag preceding copytoreg stuff together with the following stuff.
5222 InGlue = SDValue();
5223 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5224 MemOpChains2, dl);
5225 if (!MemOpChains2.empty())
5226 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5227
5228 // Store the return address to the appropriate stack slot.
5229 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5230
5231 // Emit callseq_end just before tailcall node.
5232 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5233 InGlue = Chain.getValue(1);
5234}
5235
5236// Is this global address that of a function that can be called by name? (as
5237// opposed to something that must hold a descriptor for an indirect call).
5238static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5239 if (GV) {
5240 if (GV->isThreadLocal())
5241 return false;
5242
5243 return GV->getValueType()->isFunctionTy();
5244 }
5245
5246 return false;
5247}
5248
5249SDValue PPCTargetLowering::LowerCallResult(
5250 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5251 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5252 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5254 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5255 *DAG.getContext());
5256
5257 CCRetInfo.AnalyzeCallResult(
5258 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5260 : RetCC_PPC);
5261
5262 // Copy all of the result registers out of their specified physreg.
5263 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5264 CCValAssign &VA = RVLocs[i];
5265 assert(VA.isRegLoc() && "Can only return in registers!");
5266
5267 SDValue Val;
5268
5269 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5270 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5271 InGlue);
5272 Chain = Lo.getValue(1);
5273 InGlue = Lo.getValue(2);
5274 VA = RVLocs[++i]; // skip ahead to next loc
5275 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5276 InGlue);
5277 Chain = Hi.getValue(1);
5278 InGlue = Hi.getValue(2);
5279 if (!Subtarget.isLittleEndian())
5280 std::swap (Lo, Hi);
5281 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5282 } else {
5283 Val = DAG.getCopyFromReg(Chain, dl,
5284 VA.getLocReg(), VA.getLocVT(), InGlue);
5285 Chain = Val.getValue(1);
5286 InGlue = Val.getValue(2);
5287 }
5288
5289 switch (VA.getLocInfo()) {
5290 default: llvm_unreachable("Unknown loc info!");
5291 case CCValAssign::Full: break;
5292 case CCValAssign::AExt:
5293 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5294 break;
5295 case CCValAssign::ZExt:
5296 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5297 DAG.getValueType(VA.getValVT()));
5298 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5299 break;
5300 case CCValAssign::SExt:
5301 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5302 DAG.getValueType(VA.getValVT()));
5303 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5304 break;
5305 }
5306
5307 InVals.push_back(Val);
5308 }
5309
5310 return Chain;
5311}
5312
5313static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5314 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5315 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5316 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5317
5318 // PatchPoint calls are not indirect.
5319 if (isPatchPoint)
5320 return false;
5321
5323 return false;
5324
5325 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5326 // becuase the immediate function pointer points to a descriptor instead of
5327 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5328 // pointer immediate points to the global entry point, while the BLA would
5329 // need to jump to the local entry point (see rL211174).
5330 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5331 isBLACompatibleAddress(Callee, DAG))
5332 return false;
5333
5334 return true;
5335}
5336
5337// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5338static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5339 return Subtarget.isAIXABI() ||
5340 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5341}
5342
5344 const Function &Caller, const SDValue &Callee,
5345 const PPCSubtarget &Subtarget,
5346 const TargetMachine &TM,
5347 bool IsStrictFPCall = false) {
5348 if (CFlags.IsTailCall)
5349 return PPCISD::TC_RETURN;
5350
5351 unsigned RetOpc = 0;
5352 // This is a call through a function pointer.
5353 if (CFlags.IsIndirect) {
5354 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5355 // indirect calls. The save of the caller's TOC pointer to the stack will be
5356 // inserted into the DAG as part of call lowering. The restore of the TOC
5357 // pointer is modeled by using a pseudo instruction for the call opcode that
5358 // represents the 2 instruction sequence of an indirect branch and link,
5359 // immediately followed by a load of the TOC pointer from the stack save
5360 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5361 // as it is not saved or used.
5362 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5363 : PPCISD::BCTRL;
5364 } else if (Subtarget.isUsingPCRelativeCalls()) {
5365 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5366 RetOpc = PPCISD::CALL_NOTOC;
5367 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5368 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5369 // immediately following the call instruction if the caller and callee may
5370 // have different TOC bases. At link time if the linker determines the calls
5371 // may not share a TOC base, the call is redirected to a trampoline inserted
5372 // by the linker. The trampoline will (among other things) save the callers
5373 // TOC pointer at an ABI designated offset in the linkage area and the
5374 // linker will rewrite the nop to be a load of the TOC pointer from the
5375 // linkage area into gpr2.
5376 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5377 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5378 RetOpc =
5379 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5380 } else
5381 RetOpc = PPCISD::CALL;
5382 if (IsStrictFPCall) {
5383 switch (RetOpc) {
5384 default:
5385 llvm_unreachable("Unknown call opcode");
5386 case PPCISD::BCTRL_LOAD_TOC:
5387 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5388 break;
5389 case PPCISD::BCTRL:
5390 RetOpc = PPCISD::BCTRL_RM;
5391 break;
5392 case PPCISD::CALL_NOTOC:
5393 RetOpc = PPCISD::CALL_NOTOC_RM;
5394 break;
5395 case PPCISD::CALL:
5396 RetOpc = PPCISD::CALL_RM;
5397 break;
5398 case PPCISD::CALL_NOP:
5399 RetOpc = PPCISD::CALL_NOP_RM;
5400 break;
5401 }
5402 }
5403 return RetOpc;
5404}
5405
5406static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5407 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5408 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5409 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5410 return SDValue(Dest, 0);
5411
5412 // Returns true if the callee is local, and false otherwise.
5413 auto isLocalCallee = [&]() {
5415 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5416
5417 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5419 };
5420
5421 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5422 // a static relocation model causes some versions of GNU LD (2.17.50, at
5423 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5424 // built with secure-PLT.
5425 bool UsePlt =
5426 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5428
5429 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5430 const TargetMachine &TM = Subtarget.getTargetMachine();
5432 auto *S =
5433 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5434
5436 return DAG.getMCSymbol(S, PtrVT);
5437 };
5438
5439 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5440 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5441 if (isFunctionGlobalAddress(GV)) {
5442 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5443
5444 if (Subtarget.isAIXABI()) {
5445 return getAIXFuncEntryPointSymbolSDNode(GV);
5446 }
5447 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5448 UsePlt ? PPCII::MO_PLT : 0);
5449 }
5450
5452 const char *SymName = S->getSymbol();
5453 if (Subtarget.isAIXABI()) {
5454 // If there exists a user-declared function whose name is the same as the
5455 // ExternalSymbol's, then we pick up the user-declared version.
5457 if (const Function *F =
5458 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5459 return getAIXFuncEntryPointSymbolSDNode(F);
5460
5461 // On AIX, direct function calls reference the symbol for the function's
5462 // entry point, which is named by prepending a "." before the function's
5463 // C-linkage name. A Qualname is returned here because an external
5464 // function entry point is a csect with XTY_ER property.
5465 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5466 auto &Context = DAG.getMachineFunction().getContext();
5467 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5468 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5470 return Sec->getQualNameSymbol();
5471 };
5472
5473 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5474 }
5475 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5476 UsePlt ? PPCII::MO_PLT : 0);
5477 }
5478
5479 // No transformation needed.
5480 assert(Callee.getNode() && "What no callee?");
5481 return Callee;
5482}
5483
5485 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5486 "Expected a CALLSEQ_STARTSDNode.");
5487
5488 // The last operand is the chain, except when the node has glue. If the node
5489 // has glue, then the last operand is the glue, and the chain is the second
5490 // last operand.
5491 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5492 if (LastValue.getValueType() != MVT::Glue)
5493 return LastValue;
5494
5495 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5496}
5497
5498// Creates the node that moves a functions address into the count register
5499// to prepare for an indirect call instruction.
5500static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5501 SDValue &Glue, SDValue &Chain,
5502 const SDLoc &dl) {
5503 SDValue MTCTROps[] = {Chain, Callee, Glue};
5504 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5505 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5506 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5507 // The glue is the second value produced.
5508 Glue = Chain.getValue(1);
5509}
5510
5512 SDValue &Glue, SDValue &Chain,
5513 SDValue CallSeqStart,
5514 const CallBase *CB, const SDLoc &dl,
5515 bool hasNest,
5516 const PPCSubtarget &Subtarget) {
5517 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5518 // entry point, but to the function descriptor (the function entry point
5519 // address is part of the function descriptor though).
5520 // The function descriptor is a three doubleword structure with the
5521 // following fields: function entry point, TOC base address and
5522 // environment pointer.
5523 // Thus for a call through a function pointer, the following actions need
5524 // to be performed:
5525 // 1. Save the TOC of the caller in the TOC save area of its stack
5526 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5527 // 2. Load the address of the function entry point from the function
5528 // descriptor.
5529 // 3. Load the TOC of the callee from the function descriptor into r2.
5530 // 4. Load the environment pointer from the function descriptor into
5531 // r11.
5532 // 5. Branch to the function entry point address.
5533 // 6. On return of the callee, the TOC of the caller needs to be
5534 // restored (this is done in FinishCall()).
5535 //
5536 // The loads are scheduled at the beginning of the call sequence, and the
5537 // register copies are flagged together to ensure that no other
5538 // operations can be scheduled in between. E.g. without flagging the
5539 // copies together, a TOC access in the caller could be scheduled between
5540 // the assignment of the callee TOC and the branch to the callee, which leads
5541 // to incorrect code.
5542
5543 // Start by loading the function address from the descriptor.
5544 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5545 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5549
5550 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5551
5552 // Registers used in building the DAG.
5553 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5554 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5555
5556 // Offsets of descriptor members.
5557 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5558 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5559
5560 const MVT RegVT = Subtarget.getScalarIntVT();
5561 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5562
5563 // One load for the functions entry point address.
5564 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5565 Alignment, MMOFlags);
5566
5567 // One for loading the TOC anchor for the module that contains the called
5568 // function.
5569 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5570 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5571 SDValue TOCPtr =
5572 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5573 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5574
5575 // One for loading the environment pointer.
5576 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5577 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5578 SDValue LoadEnvPtr =
5579 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5580 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5581
5582
5583 // Then copy the newly loaded TOC anchor to the TOC pointer.
5584 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5585 Chain = TOCVal.getValue(0);
5586 Glue = TOCVal.getValue(1);
5587
5588 // If the function call has an explicit 'nest' parameter, it takes the
5589 // place of the environment pointer.
5590 assert((!hasNest || !Subtarget.isAIXABI()) &&
5591 "Nest parameter is not supported on AIX.");
5592 if (!hasNest) {
5593 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5594 Chain = EnvVal.getValue(0);
5595 Glue = EnvVal.getValue(1);
5596 }
5597
5598 // The rest of the indirect call sequence is the same as the non-descriptor
5599 // DAG.
5600 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5601}
5602
5603static void
5605 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5606 SelectionDAG &DAG,
5607 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5608 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5609 const PPCSubtarget &Subtarget) {
5610 const bool IsPPC64 = Subtarget.isPPC64();
5611 // MVT for a general purpose register.
5612 const MVT RegVT = Subtarget.getScalarIntVT();
5613
5614 // First operand is always the chain.
5615 Ops.push_back(Chain);
5616
5617 // If it's a direct call pass the callee as the second operand.
5618 if (!CFlags.IsIndirect)
5619 Ops.push_back(Callee);
5620 else {
5621 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5622
5623 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5624 // on the stack (this would have been done in `LowerCall_64SVR4` or
5625 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5626 // represents both the indirect branch and a load that restores the TOC
5627 // pointer from the linkage area. The operand for the TOC restore is an add
5628 // of the TOC save offset to the stack pointer. This must be the second
5629 // operand: after the chain input but before any other variadic arguments.
5630 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5631 // saved or used.
5632 if (isTOCSaveRestoreRequired(Subtarget)) {
5633 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5634
5635 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5636 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5637 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5638 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5639 Ops.push_back(AddTOC);
5640 }
5641
5642 // Add the register used for the environment pointer.
5643 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5644 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5645 RegVT));
5646
5647
5648 // Add CTR register as callee so a bctr can be emitted later.
5649 if (CFlags.IsTailCall)
5650 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5651 }
5652
5653 // If this is a tail call add stack pointer delta.
5654 if (CFlags.IsTailCall)
5655 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5656
5657 // Add argument registers to the end of the list so that they are known live
5658 // into the call.
5659 for (const auto &[Reg, N] : RegsToPass)
5660 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5661
5662 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5663 // no way to mark dependencies as implicit here.
5664 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5665 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5666 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5667 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5668
5669 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5670 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5671 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5672
5673 // Add a register mask operand representing the call-preserved registers.
5674 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5675 const uint32_t *Mask =
5676 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5677 assert(Mask && "Missing call preserved mask for calling convention");
5678 Ops.push_back(DAG.getRegisterMask(Mask));
5679
5680 // If the glue is valid, it is the last operand.
5681 if (Glue.getNode())
5682 Ops.push_back(Glue);
5683}
5684
5685SDValue PPCTargetLowering::FinishCall(
5686 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5687 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5688 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5689 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5690 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5691
5692 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5693 Subtarget.isAIXABI())
5694 setUsesTOCBasePtr(DAG);
5695
5696 unsigned CallOpc =
5697 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5698 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5699
5700 if (!CFlags.IsIndirect)
5701 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5702 else if (Subtarget.usesFunctionDescriptors())
5703 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5704 dl, CFlags.HasNest, Subtarget);
5705 else
5706 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5707
5708 // Build the operand list for the call instruction.
5710 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5711 SPDiff, Subtarget);
5712
5713 // Emit tail call.
5714 if (CFlags.IsTailCall) {
5715 // Indirect tail call when using PC Relative calls do not have the same
5716 // constraints.
5717 assert(((Callee.getOpcode() == ISD::Register &&
5718 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5719 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5720 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5721 isa<ConstantSDNode>(Callee) ||
5722 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5723 "Expecting a global address, external symbol, absolute value, "
5724 "register or an indirect tail call when PC Relative calls are "
5725 "used.");
5726 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5727 assert(CallOpc == PPCISD::TC_RETURN &&
5728 "Unexpected call opcode for a tail call.");
5730 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5731 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5732 return Ret;
5733 }
5734
5735 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5736 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5737 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5738 Glue = Chain.getValue(1);
5739
5740 // When performing tail call optimization the callee pops its arguments off
5741 // the stack. Account for this here so these bytes can be pushed back on in
5742 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5743 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5745 ? NumBytes
5746 : 0;
5747
5748 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5749 Glue = Chain.getValue(1);
5750
5751 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5752 DAG, InVals);
5753}
5754
5756 CallingConv::ID CalleeCC = CB->getCallingConv();
5757 const Function *CallerFunc = CB->getCaller();
5758 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5759 const Function *CalleeFunc = CB->getCalledFunction();
5760 if (!CalleeFunc)
5761 return false;
5762 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5763
5766
5767 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5768 CalleeFunc->getAttributes(), Outs, *this,
5769 CalleeFunc->getDataLayout());
5770
5771 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5772 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5773 false /*isCalleeExternalSymbol*/);
5774}
5775
5776bool PPCTargetLowering::isEligibleForTCO(
5777 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5778 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5780 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5781 bool isCalleeExternalSymbol) const {
5782 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5783 return false;
5784
5785 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5786 return IsEligibleForTailCallOptimization_64SVR4(
5787 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5788 isCalleeExternalSymbol);
5789 else
5790 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5791 isVarArg, Ins);
5792}
5793
5794SDValue
5795PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5796 SmallVectorImpl<SDValue> &InVals) const {
5797 SelectionDAG &DAG = CLI.DAG;
5798 SDLoc &dl = CLI.DL;
5800 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5802 SDValue Chain = CLI.Chain;
5803 SDValue Callee = CLI.Callee;
5804 bool &isTailCall = CLI.IsTailCall;
5805 CallingConv::ID CallConv = CLI.CallConv;
5806 bool isVarArg = CLI.IsVarArg;
5807 bool isPatchPoint = CLI.IsPatchPoint;
5808 const CallBase *CB = CLI.CB;
5809
5810 if (isTailCall) {
5812 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5813 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5814 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5815 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5816
5817 isTailCall =
5818 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5819 &(MF.getFunction()), IsCalleeExternalSymbol);
5820 if (isTailCall) {
5821 ++NumTailCalls;
5822 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5823 ++NumSiblingCalls;
5824
5825 // PC Relative calls no longer guarantee that the callee is a Global
5826 // Address Node. The callee could be an indirect tail call in which
5827 // case the SDValue for the callee could be a load (to load the address
5828 // of a function pointer) or it may be a register copy (to move the
5829 // address of the callee from a function parameter into a virtual
5830 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5831 assert((Subtarget.isUsingPCRelativeCalls() ||
5832 isa<GlobalAddressSDNode>(Callee)) &&
5833 "Callee should be an llvm::Function object.");
5834
5835 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5836 << "\nTCO callee: ");
5837 LLVM_DEBUG(Callee.dump());
5838 }
5839 }
5840
5841 if (!isTailCall && CB && CB->isMustTailCall())
5842 report_fatal_error("failed to perform tail call elimination on a call "
5843 "site marked musttail");
5844
5845 // When long calls (i.e. indirect calls) are always used, calls are always
5846 // made via function pointer. If we have a function name, first translate it
5847 // into a pointer.
5848 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5849 !isTailCall)
5850 Callee = LowerGlobalAddress(Callee, DAG);
5851
5852 CallFlags CFlags(
5853 CallConv, isTailCall, isVarArg, isPatchPoint,
5854 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5855 // hasNest
5856 Subtarget.is64BitELFABI() &&
5857 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5858 CLI.NoMerge);
5859
5860 if (Subtarget.isAIXABI())
5861 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5862 InVals, CB);
5863
5864 assert(Subtarget.isSVR4ABI());
5865 if (Subtarget.isPPC64())
5866 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5867 InVals, CB);
5868 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5869 InVals, CB);
5870}
5871
5872SDValue PPCTargetLowering::LowerCall_32SVR4(
5873 SDValue Chain, SDValue Callee, CallFlags CFlags,
5875 const SmallVectorImpl<SDValue> &OutVals,
5876 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5878 const CallBase *CB) const {
5879 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5880 // of the 32-bit SVR4 ABI stack frame layout.
5881
5882 const CallingConv::ID CallConv = CFlags.CallConv;
5883 const bool IsVarArg = CFlags.IsVarArg;
5884 const bool IsTailCall = CFlags.IsTailCall;
5885
5886 assert((CallConv == CallingConv::C ||
5887 CallConv == CallingConv::Cold ||
5888 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5889
5890 const Align PtrAlign(4);
5891
5892 MachineFunction &MF = DAG.getMachineFunction();
5893
5894 // Mark this function as potentially containing a function that contains a
5895 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5896 // and restoring the callers stack pointer in this functions epilog. This is
5897 // done because by tail calling the called function might overwrite the value
5898 // in this function's (MF) stack pointer stack slot 0(SP).
5899 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5900 CallConv == CallingConv::Fast)
5901 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5902
5903 // Count how many bytes are to be pushed on the stack, including the linkage
5904 // area, parameter list area and the part of the local variable space which
5905 // contains copies of aggregates which are passed by value.
5906
5907 // Assign locations to all of the outgoing arguments.
5909 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5910
5911 // Reserve space for the linkage area on the stack.
5912 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5913 PtrAlign);
5914
5915 if (IsVarArg) {
5916 // Handle fixed and variable vector arguments differently.
5917 // Fixed vector arguments go into registers as long as registers are
5918 // available. Variable vector arguments always go into memory.
5919 unsigned NumArgs = Outs.size();
5920
5921 for (unsigned i = 0; i != NumArgs; ++i) {
5922 MVT ArgVT = Outs[i].VT;
5923 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5924 bool Result;
5925
5926 if (!ArgFlags.isVarArg()) {
5927 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5928 Outs[i].OrigTy, CCInfo);
5929 } else {
5931 ArgFlags, Outs[i].OrigTy, CCInfo);
5932 }
5933
5934 if (Result) {
5935#ifndef NDEBUG
5936 errs() << "Call operand #" << i << " has unhandled type "
5937 << ArgVT << "\n";
5938#endif
5939 llvm_unreachable(nullptr);
5940 }
5941 }
5942 } else {
5943 // All arguments are treated the same.
5944 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5945 }
5946
5947 // Assign locations to all of the outgoing aggregate by value arguments.
5948 SmallVector<CCValAssign, 16> ByValArgLocs;
5949 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5950
5951 // Reserve stack space for the allocations in CCInfo.
5952 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5953
5954 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5955
5956 // Size of the linkage area, parameter list area and the part of the local
5957 // space variable where copies of aggregates which are passed by value are
5958 // stored.
5959 unsigned NumBytes = CCByValInfo.getStackSize();
5960
5961 // Calculate by how many bytes the stack has to be adjusted in case of tail
5962 // call optimization.
5963 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5964
5965 // Adjust the stack pointer for the new arguments...
5966 // These operations are automatically eliminated by the prolog/epilog pass
5967 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5968 SDValue CallSeqStart = Chain;
5969
5970 // Load the return address and frame pointer so it can be moved somewhere else
5971 // later.
5972 SDValue LROp, FPOp;
5973 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5974
5975 // Set up a copy of the stack pointer for use loading and storing any
5976 // arguments that may not fit in the registers available for argument
5977 // passing.
5978 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5979
5981 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5982 SmallVector<SDValue, 8> MemOpChains;
5983
5984 bool seenFloatArg = false;
5985 // Walk the register/memloc assignments, inserting copies/loads.
5986 // i - Tracks the index into the list of registers allocated for the call
5987 // RealArgIdx - Tracks the index into the list of actual function arguments
5988 // j - Tracks the index into the list of byval arguments
5989 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5990 i != e;
5991 ++i, ++RealArgIdx) {
5992 CCValAssign &VA = ArgLocs[i];
5993 SDValue Arg = OutVals[RealArgIdx];
5994 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5995
5996 if (Flags.isByVal()) {
5997 // Argument is an aggregate which is passed by value, thus we need to
5998 // create a copy of it in the local variable space of the current stack
5999 // frame (which is the stack frame of the caller) and pass the address of
6000 // this copy to the callee.
6001 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6002 CCValAssign &ByValVA = ByValArgLocs[j++];
6003 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6004
6005 // Memory reserved in the local variable space of the callers stack frame.
6006 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6007
6008 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6009 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6010 StackPtr, PtrOff);
6011
6012 // Create a copy of the argument in the local area of the current
6013 // stack frame.
6014 SDValue MemcpyCall =
6015 CreateCopyOfByValArgument(Arg, PtrOff,
6016 CallSeqStart.getNode()->getOperand(0),
6017 Flags, DAG, dl);
6018
6019 // This must go outside the CALLSEQ_START..END.
6020 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6021 SDLoc(MemcpyCall));
6022 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6023 NewCallSeqStart.getNode());
6024 Chain = CallSeqStart = NewCallSeqStart;
6025
6026 // Pass the address of the aggregate copy on the stack either in a
6027 // physical register or in the parameter list area of the current stack
6028 // frame to the callee.
6029 Arg = PtrOff;
6030 }
6031
6032 // When useCRBits() is true, there can be i1 arguments.
6033 // It is because getRegisterType(MVT::i1) => MVT::i1,
6034 // and for other integer types getRegisterType() => MVT::i32.
6035 // Extend i1 and ensure callee will get i32.
6036 if (Arg.getValueType() == MVT::i1)
6037 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6038 dl, MVT::i32, Arg);
6039
6040 if (VA.isRegLoc()) {
6041 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6042 // Put argument in a physical register.
6043 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6044 bool IsLE = Subtarget.isLittleEndian();
6045 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6046 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6047 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6048 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6049 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6050 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6051 SVal.getValue(0)));
6052 } else
6053 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6054 } else {
6055 // Put argument in the parameter list area of the current stack frame.
6056 assert(VA.isMemLoc());
6057 unsigned LocMemOffset = VA.getLocMemOffset();
6058
6059 if (!IsTailCall) {
6060 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6061 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6062 StackPtr, PtrOff);
6063
6064 MemOpChains.push_back(
6065 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6066 } else {
6067 // Calculate and remember argument location.
6068 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6069 TailCallArguments);
6070 }
6071 }
6072 }
6073
6074 if (!MemOpChains.empty())
6075 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6076
6077 // Build a sequence of copy-to-reg nodes chained together with token chain
6078 // and flag operands which copy the outgoing args into the appropriate regs.
6079 SDValue InGlue;
6080 for (const auto &[Reg, N] : RegsToPass) {
6081 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6082 InGlue = Chain.getValue(1);
6083 }
6084
6085 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6086 // registers.
6087 if (IsVarArg) {
6088 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6089 SDValue Ops[] = { Chain, InGlue };
6090
6091 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6092 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6093
6094 InGlue = Chain.getValue(1);
6095 }
6096
6097 if (IsTailCall)
6098 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6099 TailCallArguments);
6100
6101 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6102 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6103}
6104
6105// Copy an argument into memory, being careful to do this outside the
6106// call sequence for the call to which the argument belongs.
6107SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6108 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6109 SelectionDAG &DAG, const SDLoc &dl) const {
6110 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6111 CallSeqStart.getNode()->getOperand(0),
6112 Flags, DAG, dl);
6113 // The MEMCPY must go outside the CALLSEQ_START..END.
6114 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6115 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6116 SDLoc(MemcpyCall));
6117 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6118 NewCallSeqStart.getNode());
6119 return NewCallSeqStart;
6120}
6121
6122SDValue PPCTargetLowering::LowerCall_64SVR4(
6123 SDValue Chain, SDValue Callee, CallFlags CFlags,
6125 const SmallVectorImpl<SDValue> &OutVals,
6126 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6128 const CallBase *CB) const {
6129 bool isELFv2ABI = Subtarget.isELFv2ABI();
6130 bool isLittleEndian = Subtarget.isLittleEndian();
6131 unsigned NumOps = Outs.size();
6132 bool IsSibCall = false;
6133 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6134
6135 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6136 unsigned PtrByteSize = 8;
6137
6138 MachineFunction &MF = DAG.getMachineFunction();
6139
6140 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6141 IsSibCall = true;
6142
6143 // Mark this function as potentially containing a function that contains a
6144 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6145 // and restoring the callers stack pointer in this functions epilog. This is
6146 // done because by tail calling the called function might overwrite the value
6147 // in this function's (MF) stack pointer stack slot 0(SP).
6148 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6149 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6150
6151 assert(!(IsFastCall && CFlags.IsVarArg) &&
6152 "fastcc not supported on varargs functions");
6153
6154 // Count how many bytes are to be pushed on the stack, including the linkage
6155 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6156 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6157 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6158 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6159 unsigned NumBytes = LinkageSize;
6160 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6161
6162 static const MCPhysReg GPR[] = {
6163 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6164 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6165 };
6166 static const MCPhysReg VR[] = {
6167 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6168 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6169 };
6170
6171 const unsigned NumGPRs = std::size(GPR);
6172 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6173 const unsigned NumVRs = std::size(VR);
6174
6175 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6176 // can be passed to the callee in registers.
6177 // For the fast calling convention, there is another check below.
6178 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6179 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6180 if (!HasParameterArea) {
6181 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6182 unsigned AvailableFPRs = NumFPRs;
6183 unsigned AvailableVRs = NumVRs;
6184 unsigned NumBytesTmp = NumBytes;
6185 for (unsigned i = 0; i != NumOps; ++i) {
6186 if (Outs[i].Flags.isNest()) continue;
6187 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6188 PtrByteSize, LinkageSize, ParamAreaSize,
6189 NumBytesTmp, AvailableFPRs, AvailableVRs))
6190 HasParameterArea = true;
6191 }
6192 }
6193
6194 // When using the fast calling convention, we don't provide backing for
6195 // arguments that will be in registers.
6196 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6197
6198 // Avoid allocating parameter area for fastcc functions if all the arguments
6199 // can be passed in the registers.
6200 if (IsFastCall)
6201 HasParameterArea = false;
6202
6203 // Add up all the space actually used.
6204 for (unsigned i = 0; i != NumOps; ++i) {
6205 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6206 EVT ArgVT = Outs[i].VT;
6207 EVT OrigVT = Outs[i].ArgVT;
6208
6209 if (Flags.isNest())
6210 continue;
6211
6212 if (IsFastCall) {
6213 if (Flags.isByVal()) {
6214 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6215 if (NumGPRsUsed > NumGPRs)
6216 HasParameterArea = true;
6217 } else {
6218 switch (ArgVT.getSimpleVT().SimpleTy) {
6219 default: llvm_unreachable("Unexpected ValueType for argument!");
6220 case MVT::i1:
6221 case MVT::i32:
6222 case MVT::i64:
6223 if (++NumGPRsUsed <= NumGPRs)
6224 continue;
6225 break;
6226 case MVT::v4i32:
6227 case MVT::v8i16:
6228 case MVT::v16i8:
6229 case MVT::v2f64:
6230 case MVT::v2i64:
6231 case MVT::v1i128:
6232 case MVT::f128:
6233 if (++NumVRsUsed <= NumVRs)
6234 continue;
6235 break;
6236 case MVT::v4f32:
6237 if (++NumVRsUsed <= NumVRs)
6238 continue;
6239 break;
6240 case MVT::f32:
6241 case MVT::f64:
6242 if (++NumFPRsUsed <= NumFPRs)
6243 continue;
6244 break;
6245 }
6246 HasParameterArea = true;
6247 }
6248 }
6249
6250 /* Respect alignment of argument on the stack. */
6251 auto Alignement =
6252 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6253 NumBytes = alignTo(NumBytes, Alignement);
6254
6255 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6256 if (Flags.isInConsecutiveRegsLast())
6257 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6258 }
6259
6260 unsigned NumBytesActuallyUsed = NumBytes;
6261
6262 // In the old ELFv1 ABI,
6263 // the prolog code of the callee may store up to 8 GPR argument registers to
6264 // the stack, allowing va_start to index over them in memory if its varargs.
6265 // Because we cannot tell if this is needed on the caller side, we have to
6266 // conservatively assume that it is needed. As such, make sure we have at
6267 // least enough stack space for the caller to store the 8 GPRs.
6268 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6269 // really requires memory operands, e.g. a vararg function.
6270 if (HasParameterArea)
6271 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6272 else
6273 NumBytes = LinkageSize;
6274
6275 // Tail call needs the stack to be aligned.
6276 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6277 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6278
6279 int SPDiff = 0;
6280
6281 // Calculate by how many bytes the stack has to be adjusted in case of tail
6282 // call optimization.
6283 if (!IsSibCall)
6284 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6285
6286 // To protect arguments on the stack from being clobbered in a tail call,
6287 // force all the loads to happen before doing any other lowering.
6288 if (CFlags.IsTailCall)
6289 Chain = DAG.getStackArgumentTokenFactor(Chain);
6290
6291 // Adjust the stack pointer for the new arguments...
6292 // These operations are automatically eliminated by the prolog/epilog pass
6293 if (!IsSibCall)
6294 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6295 SDValue CallSeqStart = Chain;
6296
6297 // Load the return address and frame pointer so it can be move somewhere else
6298 // later.
6299 SDValue LROp, FPOp;
6300 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6301
6302 // Set up a copy of the stack pointer for use loading and storing any
6303 // arguments that may not fit in the registers available for argument
6304 // passing.
6305 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6306
6307 // Figure out which arguments are going to go in registers, and which in
6308 // memory. Also, if this is a vararg function, floating point operations
6309 // must be stored to our stack, and loaded into integer regs as well, if
6310 // any integer regs are available for argument passing.
6311 unsigned ArgOffset = LinkageSize;
6312
6314 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6315
6316 SmallVector<SDValue, 8> MemOpChains;
6317 for (unsigned i = 0; i != NumOps; ++i) {
6318 SDValue Arg = OutVals[i];
6319 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6320 EVT ArgVT = Outs[i].VT;
6321 EVT OrigVT = Outs[i].ArgVT;
6322
6323 // PtrOff will be used to store the current argument to the stack if a
6324 // register cannot be found for it.
6325 SDValue PtrOff;
6326
6327 // We re-align the argument offset for each argument, except when using the
6328 // fast calling convention, when we need to make sure we do that only when
6329 // we'll actually use a stack slot.
6330 auto ComputePtrOff = [&]() {
6331 /* Respect alignment of argument on the stack. */
6332 auto Alignment =
6333 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6334 ArgOffset = alignTo(ArgOffset, Alignment);
6335
6336 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6337
6338 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6339 };
6340
6341 if (!IsFastCall) {
6342 ComputePtrOff();
6343
6344 /* Compute GPR index associated with argument offset. */
6345 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6346 GPR_idx = std::min(GPR_idx, NumGPRs);
6347 }
6348
6349 // Promote integers to 64-bit values.
6350 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6351 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6352 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6353 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6354 }
6355
6356 // FIXME memcpy is used way more than necessary. Correctness first.
6357 // Note: "by value" is code for passing a structure by value, not
6358 // basic types.
6359 if (Flags.isByVal()) {
6360 // Note: Size includes alignment padding, so
6361 // struct x { short a; char b; }
6362 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6363 // These are the proper values we need for right-justifying the
6364 // aggregate in a parameter register.
6365 unsigned Size = Flags.getByValSize();
6366
6367 // An empty aggregate parameter takes up no storage and no
6368 // registers.
6369 if (Size == 0)
6370 continue;
6371
6372 if (IsFastCall)
6373 ComputePtrOff();
6374
6375 // All aggregates smaller than 8 bytes must be passed right-justified.
6376 if (Size==1 || Size==2 || Size==4) {
6377 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6378 if (GPR_idx != NumGPRs) {
6379 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6380 MachinePointerInfo(), VT);
6381 MemOpChains.push_back(Load.getValue(1));
6382 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6383
6384 ArgOffset += PtrByteSize;
6385 continue;
6386 }
6387 }
6388
6389 if (GPR_idx == NumGPRs && Size < 8) {
6390 SDValue AddPtr = PtrOff;
6391 if (!isLittleEndian) {
6392 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6393 PtrOff.getValueType());
6394 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6395 }
6396 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6397 CallSeqStart,
6398 Flags, DAG, dl);
6399 ArgOffset += PtrByteSize;
6400 continue;
6401 }
6402 // Copy the object to parameter save area if it can not be entirely passed
6403 // by registers.
6404 // FIXME: we only need to copy the parts which need to be passed in
6405 // parameter save area. For the parts passed by registers, we don't need
6406 // to copy them to the stack although we need to allocate space for them
6407 // in parameter save area.
6408 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6409 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6410 CallSeqStart,
6411 Flags, DAG, dl);
6412
6413 // When a register is available, pass a small aggregate right-justified.
6414 if (Size < 8 && GPR_idx != NumGPRs) {
6415 // The easiest way to get this right-justified in a register
6416 // is to copy the structure into the rightmost portion of a
6417 // local variable slot, then load the whole slot into the
6418 // register.
6419 // FIXME: The memcpy seems to produce pretty awful code for
6420 // small aggregates, particularly for packed ones.
6421 // FIXME: It would be preferable to use the slot in the
6422 // parameter save area instead of a new local variable.
6423 SDValue AddPtr = PtrOff;
6424 if (!isLittleEndian) {
6425 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6426 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6427 }
6428 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6429 CallSeqStart,
6430 Flags, DAG, dl);
6431
6432 // Load the slot into the register.
6433 SDValue Load =
6434 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6435 MemOpChains.push_back(Load.getValue(1));
6436 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6437
6438 // Done with this argument.
6439 ArgOffset += PtrByteSize;
6440 continue;
6441 }
6442
6443 // For aggregates larger than PtrByteSize, copy the pieces of the
6444 // object that fit into registers from the parameter save area.
6445 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6446 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6447 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6448 if (GPR_idx != NumGPRs) {
6449 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6450 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6451 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6452 MachinePointerInfo(), ObjType);
6453
6454 MemOpChains.push_back(Load.getValue(1));
6455 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6456 ArgOffset += PtrByteSize;
6457 } else {
6458 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6459 break;
6460 }
6461 }
6462 continue;
6463 }
6464
6465 switch (Arg.getSimpleValueType().SimpleTy) {
6466 default: llvm_unreachable("Unexpected ValueType for argument!");
6467 case MVT::i1:
6468 case MVT::i32:
6469 case MVT::i64:
6470 if (Flags.isNest()) {
6471 // The 'nest' parameter, if any, is passed in R11.
6472 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6473 break;
6474 }
6475
6476 // These can be scalar arguments or elements of an integer array type
6477 // passed directly. Clang may use those instead of "byval" aggregate
6478 // types to avoid forcing arguments to memory unnecessarily.
6479 if (GPR_idx != NumGPRs) {
6480 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6481 } else {
6482 if (IsFastCall)
6483 ComputePtrOff();
6484
6485 assert(HasParameterArea &&
6486 "Parameter area must exist to pass an argument in memory.");
6487 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6488 true, CFlags.IsTailCall, false, MemOpChains,
6489 TailCallArguments, dl);
6490 if (IsFastCall)
6491 ArgOffset += PtrByteSize;
6492 }
6493 if (!IsFastCall)
6494 ArgOffset += PtrByteSize;
6495 break;
6496 case MVT::f32:
6497 case MVT::f64: {
6498 // These can be scalar arguments or elements of a float array type
6499 // passed directly. The latter are used to implement ELFv2 homogenous
6500 // float aggregates.
6501
6502 // Named arguments go into FPRs first, and once they overflow, the
6503 // remaining arguments go into GPRs and then the parameter save area.
6504 // Unnamed arguments for vararg functions always go to GPRs and
6505 // then the parameter save area. For now, put all arguments to vararg
6506 // routines always in both locations (FPR *and* GPR or stack slot).
6507 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6508 bool NeededLoad = false;
6509
6510 // First load the argument into the next available FPR.
6511 if (FPR_idx != NumFPRs)
6512 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6513
6514 // Next, load the argument into GPR or stack slot if needed.
6515 if (!NeedGPROrStack)
6516 ;
6517 else if (GPR_idx != NumGPRs && !IsFastCall) {
6518 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6519 // once we support fp <-> gpr moves.
6520
6521 // In the non-vararg case, this can only ever happen in the
6522 // presence of f32 array types, since otherwise we never run
6523 // out of FPRs before running out of GPRs.
6524 SDValue ArgVal;
6525
6526 // Double values are always passed in a single GPR.
6527 if (Arg.getValueType() != MVT::f32) {
6528 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6529
6530 // Non-array float values are extended and passed in a GPR.
6531 } else if (!Flags.isInConsecutiveRegs()) {
6532 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6533 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6534
6535 // If we have an array of floats, we collect every odd element
6536 // together with its predecessor into one GPR.
6537 } else if (ArgOffset % PtrByteSize != 0) {
6538 SDValue Lo, Hi;
6539 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6540 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6541 if (!isLittleEndian)
6542 std::swap(Lo, Hi);
6543 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6544
6545 // The final element, if even, goes into the first half of a GPR.
6546 } else if (Flags.isInConsecutiveRegsLast()) {
6547 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6548 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6549 if (!isLittleEndian)
6550 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6551 DAG.getConstant(32, dl, MVT::i32));
6552
6553 // Non-final even elements are skipped; they will be handled
6554 // together the with subsequent argument on the next go-around.
6555 } else
6556 ArgVal = SDValue();
6557
6558 if (ArgVal.getNode())
6559 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6560 } else {
6561 if (IsFastCall)
6562 ComputePtrOff();
6563
6564 // Single-precision floating-point values are mapped to the
6565 // second (rightmost) word of the stack doubleword.
6566 if (Arg.getValueType() == MVT::f32 &&
6567 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6568 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6569 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6570 }
6571
6572 assert(HasParameterArea &&
6573 "Parameter area must exist to pass an argument in memory.");
6574 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6575 true, CFlags.IsTailCall, false, MemOpChains,
6576 TailCallArguments, dl);
6577
6578 NeededLoad = true;
6579 }
6580 // When passing an array of floats, the array occupies consecutive
6581 // space in the argument area; only round up to the next doubleword
6582 // at the end of the array. Otherwise, each float takes 8 bytes.
6583 if (!IsFastCall || NeededLoad) {
6584 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6585 Flags.isInConsecutiveRegs()) ? 4 : 8;
6586 if (Flags.isInConsecutiveRegsLast())
6587 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6588 }
6589 break;
6590 }
6591 case MVT::v4f32:
6592 case MVT::v4i32:
6593 case MVT::v8i16:
6594 case MVT::v16i8:
6595 case MVT::v2f64:
6596 case MVT::v2i64:
6597 case MVT::v1i128:
6598 case MVT::f128:
6599 // These can be scalar arguments or elements of a vector array type
6600 // passed directly. The latter are used to implement ELFv2 homogenous
6601 // vector aggregates.
6602
6603 // For a varargs call, named arguments go into VRs or on the stack as
6604 // usual; unnamed arguments always go to the stack or the corresponding
6605 // GPRs when within range. For now, we always put the value in both
6606 // locations (or even all three).
6607 if (CFlags.IsVarArg) {
6608 assert(HasParameterArea &&
6609 "Parameter area must exist if we have a varargs call.");
6610 // We could elide this store in the case where the object fits
6611 // entirely in R registers. Maybe later.
6612 SDValue Store =
6613 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6614 MemOpChains.push_back(Store);
6615 if (VR_idx != NumVRs) {
6616 SDValue Load =
6617 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6618 MemOpChains.push_back(Load.getValue(1));
6619 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6620 }
6621 ArgOffset += 16;
6622 for (unsigned i=0; i<16; i+=PtrByteSize) {
6623 if (GPR_idx == NumGPRs)
6624 break;
6625 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6626 DAG.getConstant(i, dl, PtrVT));
6627 SDValue Load =
6628 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6629 MemOpChains.push_back(Load.getValue(1));
6630 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6631 }
6632 break;
6633 }
6634
6635 // Non-varargs Altivec params go into VRs or on the stack.
6636 if (VR_idx != NumVRs) {
6637 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6638 } else {
6639 if (IsFastCall)
6640 ComputePtrOff();
6641
6642 assert(HasParameterArea &&
6643 "Parameter area must exist to pass an argument in memory.");
6644 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6645 true, CFlags.IsTailCall, true, MemOpChains,
6646 TailCallArguments, dl);
6647 if (IsFastCall)
6648 ArgOffset += 16;
6649 }
6650
6651 if (!IsFastCall)
6652 ArgOffset += 16;
6653 break;
6654 }
6655 }
6656
6657 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6658 "mismatch in size of parameter area");
6659 (void)NumBytesActuallyUsed;
6660
6661 if (!MemOpChains.empty())
6662 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6663
6664 // Check if this is an indirect call (MTCTR/BCTRL).
6665 // See prepareDescriptorIndirectCall and buildCallOperands for more
6666 // information about calls through function pointers in the 64-bit SVR4 ABI.
6667 if (CFlags.IsIndirect) {
6668 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6669 // caller in the TOC save area.
6670 if (isTOCSaveRestoreRequired(Subtarget)) {
6671 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6672 // Load r2 into a virtual register and store it to the TOC save area.
6673 setUsesTOCBasePtr(DAG);
6674 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6675 // TOC save area offset.
6676 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6677 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6678 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6679 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6681 DAG.getMachineFunction(), TOCSaveOffset));
6682 }
6683 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6684 // This does not mean the MTCTR instruction must use R12; it's easier
6685 // to model this as an extra parameter, so do that.
6686 if (isELFv2ABI && !CFlags.IsPatchPoint)
6687 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6688 }
6689
6690 // Build a sequence of copy-to-reg nodes chained together with token chain
6691 // and flag operands which copy the outgoing args into the appropriate regs.
6692 SDValue InGlue;
6693 for (const auto &[Reg, N] : RegsToPass) {
6694 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6695 InGlue = Chain.getValue(1);
6696 }
6697
6698 if (CFlags.IsTailCall && !IsSibCall)
6699 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6700 TailCallArguments);
6701
6702 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6703 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6704}
6705
6706// Returns true when the shadow of a general purpose argument register
6707// in the parameter save area is aligned to at least 'RequiredAlign'.
6708static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6709 assert(RequiredAlign.value() <= 16 &&
6710 "Required alignment greater than stack alignment.");
6711 switch (Reg) {
6712 default:
6713 report_fatal_error("called on invalid register.");
6714 case PPC::R5:
6715 case PPC::R9:
6716 case PPC::X3:
6717 case PPC::X5:
6718 case PPC::X7:
6719 case PPC::X9:
6720 // These registers are 16 byte aligned which is the most strict aligment
6721 // we can support.
6722 return true;
6723 case PPC::R3:
6724 case PPC::R7:
6725 case PPC::X4:
6726 case PPC::X6:
6727 case PPC::X8:
6728 case PPC::X10:
6729 // The shadow of these registers in the PSA is 8 byte aligned.
6730 return RequiredAlign <= 8;
6731 case PPC::R4:
6732 case PPC::R6:
6733 case PPC::R8:
6734 case PPC::R10:
6735 return RequiredAlign <= 4;
6736 }
6737}
6738
6739static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6740 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6741 Type *OrigTy, CCState &State) {
6742 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6743 State.getMachineFunction().getSubtarget());
6744 const bool IsPPC64 = Subtarget.isPPC64();
6745 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6746 const Align PtrAlign(PtrSize);
6747 const Align StackAlign(16);
6748 const MVT RegVT = Subtarget.getScalarIntVT();
6749
6750 if (ValVT == MVT::f128)
6751 report_fatal_error("f128 is unimplemented on AIX.");
6752
6753 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6754 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6755 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6756 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6757 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6758 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6759
6760 static const MCPhysReg VR[] = {// Vector registers.
6761 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6762 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6763 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6764
6765 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6766
6767 if (ArgFlags.isNest()) {
6768 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6769 if (!EnvReg)
6770 report_fatal_error("More then one nest argument.");
6771 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6772 return false;
6773 }
6774
6775 if (ArgFlags.isByVal()) {
6776 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6777 if (ByValAlign > StackAlign)
6778 report_fatal_error("Pass-by-value arguments with alignment greater than "
6779 "16 are not supported.");
6780
6781 const unsigned ByValSize = ArgFlags.getByValSize();
6782 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6783
6784 // An empty aggregate parameter takes up no storage and no registers,
6785 // but needs a MemLoc for a stack slot for the formal arguments side.
6786 if (ByValSize == 0) {
6788 State.getStackSize(), RegVT, LocInfo));
6789 return false;
6790 }
6791
6792 // Shadow allocate any registers that are not properly aligned.
6793 unsigned NextReg = State.getFirstUnallocated(GPRs);
6794 while (NextReg != GPRs.size() &&
6795 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6796 // Shadow allocate next registers since its aligment is not strict enough.
6797 MCRegister Reg = State.AllocateReg(GPRs);
6798 // Allocate the stack space shadowed by said register.
6799 State.AllocateStack(PtrSize, PtrAlign);
6800 assert(Reg && "Alocating register unexpectedly failed.");
6801 (void)Reg;
6802 NextReg = State.getFirstUnallocated(GPRs);
6803 }
6804
6805 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6806 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6807 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6808 if (MCRegister Reg = State.AllocateReg(GPRs))
6809 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6810 else {
6813 LocInfo));
6814 break;
6815 }
6816 }
6817 return false;
6818 }
6819
6820 // Arguments always reserve parameter save area.
6821 switch (ValVT.SimpleTy) {
6822 default:
6823 report_fatal_error("Unhandled value type for argument.");
6824 case MVT::i64:
6825 // i64 arguments should have been split to i32 for PPC32.
6826 assert(IsPPC64 && "PPC32 should have split i64 values.");
6827 [[fallthrough]];
6828 case MVT::i1:
6829 case MVT::i32: {
6830 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6831 // AIX integer arguments are always passed in register width.
6832 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6833 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6835 if (MCRegister Reg = State.AllocateReg(GPRs))
6836 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6837 else
6838 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6839
6840 return false;
6841 }
6842 case MVT::f32:
6843 case MVT::f64: {
6844 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6845 const unsigned StoreSize = LocVT.getStoreSize();
6846 // Floats are always 4-byte aligned in the PSA on AIX.
6847 // This includes f64 in 64-bit mode for ABI compatibility.
6848 const unsigned Offset =
6849 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6850 MCRegister FReg = State.AllocateReg(FPR);
6851 if (FReg)
6852 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6853
6854 // Reserve and initialize GPRs or initialize the PSA as required.
6855 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6856 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6857 assert(FReg && "An FPR should be available when a GPR is reserved.");
6858 if (State.isVarArg()) {
6859 // Successfully reserved GPRs are only initialized for vararg calls.
6860 // Custom handling is required for:
6861 // f64 in PPC32 needs to be split into 2 GPRs.
6862 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6863 State.addLoc(
6864 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6865 }
6866 } else {
6867 // If there are insufficient GPRs, the PSA needs to be initialized.
6868 // Initialization occurs even if an FPR was initialized for
6869 // compatibility with the AIX XL compiler. The full memory for the
6870 // argument will be initialized even if a prior word is saved in GPR.
6871 // A custom memLoc is used when the argument also passes in FPR so
6872 // that the callee handling can skip over it easily.
6873 State.addLoc(
6874 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6875 LocInfo)
6876 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6877 break;
6878 }
6879 }
6880
6881 return false;
6882 }
6883 case MVT::v4f32:
6884 case MVT::v4i32:
6885 case MVT::v8i16:
6886 case MVT::v16i8:
6887 case MVT::v2i64:
6888 case MVT::v2f64:
6889 case MVT::v1i128: {
6890 const unsigned VecSize = 16;
6891 const Align VecAlign(VecSize);
6892
6893 if (!State.isVarArg()) {
6894 // If there are vector registers remaining we don't consume any stack
6895 // space.
6896 if (MCRegister VReg = State.AllocateReg(VR)) {
6897 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6898 return false;
6899 }
6900 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6901 // might be allocated in the portion of the PSA that is shadowed by the
6902 // GPRs.
6903 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6904 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6905 return false;
6906 }
6907
6908 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6909 // Burn any underaligned registers and their shadowed stack space until
6910 // we reach the required alignment.
6911 while (NextRegIndex != GPRs.size() &&
6912 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6913 // Shadow allocate register and its stack shadow.
6914 MCRegister Reg = State.AllocateReg(GPRs);
6915 State.AllocateStack(PtrSize, PtrAlign);
6916 assert(Reg && "Allocating register unexpectedly failed.");
6917 (void)Reg;
6918 NextRegIndex = State.getFirstUnallocated(GPRs);
6919 }
6920
6921 // Vectors that are passed as fixed arguments are handled differently.
6922 // They are passed in VRs if any are available (unlike arguments passed
6923 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6924 // functions)
6925 if (!ArgFlags.isVarArg()) {
6926 if (MCRegister VReg = State.AllocateReg(VR)) {
6927 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6928 // Shadow allocate GPRs and stack space even though we pass in a VR.
6929 for (unsigned I = 0; I != VecSize; I += PtrSize)
6930 State.AllocateReg(GPRs);
6931 State.AllocateStack(VecSize, VecAlign);
6932 return false;
6933 }
6934 // No vector registers remain so pass on the stack.
6935 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6936 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6937 return false;
6938 }
6939
6940 // If all GPRS are consumed then we pass the argument fully on the stack.
6941 if (NextRegIndex == GPRs.size()) {
6942 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6943 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6944 return false;
6945 }
6946
6947 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6948 // half of the argument, and then need to pass the remaining half on the
6949 // stack.
6950 if (GPRs[NextRegIndex] == PPC::R9) {
6951 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6952 State.addLoc(
6953 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6954
6955 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
6956 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
6957 assert(FirstReg && SecondReg &&
6958 "Allocating R9 or R10 unexpectedly failed.");
6959 State.addLoc(
6960 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6961 State.addLoc(
6962 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6963 return false;
6964 }
6965
6966 // We have enough GPRs to fully pass the vector argument, and we have
6967 // already consumed any underaligned registers. Start with the custom
6968 // MemLoc and then the custom RegLocs.
6969 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6970 State.addLoc(
6971 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6972 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6973 const MCRegister Reg = State.AllocateReg(GPRs);
6974 assert(Reg && "Failed to allocated register for vararg vector argument");
6975 State.addLoc(
6976 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6977 }
6978 return false;
6979 }
6980 }
6981 return true;
6982}
6983
6984// So far, this function is only used by LowerFormalArguments_AIX()
6986 bool IsPPC64,
6987 bool HasP8Vector,
6988 bool HasVSX) {
6989 assert((IsPPC64 || SVT != MVT::i64) &&
6990 "i64 should have been split for 32-bit codegen.");
6991
6992 switch (SVT) {
6993 default:
6994 report_fatal_error("Unexpected value type for formal argument");
6995 case MVT::i1:
6996 case MVT::i32:
6997 case MVT::i64:
6998 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6999 case MVT::f32:
7000 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7001 case MVT::f64:
7002 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7003 case MVT::v4f32:
7004 case MVT::v4i32:
7005 case MVT::v8i16:
7006 case MVT::v16i8:
7007 case MVT::v2i64:
7008 case MVT::v2f64:
7009 case MVT::v1i128:
7010 return &PPC::VRRCRegClass;
7011 }
7012}
7013
7015 SelectionDAG &DAG, SDValue ArgValue,
7016 MVT LocVT, const SDLoc &dl) {
7017 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7018 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7019
7020 if (Flags.isSExt())
7021 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7022 DAG.getValueType(ValVT));
7023 else if (Flags.isZExt())
7024 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7025 DAG.getValueType(ValVT));
7026
7027 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7028}
7029
7030static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7031 const unsigned LASize = FL->getLinkageSize();
7032
7033 if (PPC::GPRCRegClass.contains(Reg)) {
7034 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7035 "Reg must be a valid argument register!");
7036 return LASize + 4 * (Reg - PPC::R3);
7037 }
7038
7039 if (PPC::G8RCRegClass.contains(Reg)) {
7040 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7041 "Reg must be a valid argument register!");
7042 return LASize + 8 * (Reg - PPC::X3);
7043 }
7044
7045 llvm_unreachable("Only general purpose registers expected.");
7046}
7047
7048// AIX ABI Stack Frame Layout:
7049//
7050// Low Memory +--------------------------------------------+
7051// SP +---> | Back chain | ---+
7052// | +--------------------------------------------+ |
7053// | | Saved Condition Register | |
7054// | +--------------------------------------------+ |
7055// | | Saved Linkage Register | |
7056// | +--------------------------------------------+ | Linkage Area
7057// | | Reserved for compilers | |
7058// | +--------------------------------------------+ |
7059// | | Reserved for binders | |
7060// | +--------------------------------------------+ |
7061// | | Saved TOC pointer | ---+
7062// | +--------------------------------------------+
7063// | | Parameter save area |
7064// | +--------------------------------------------+
7065// | | Alloca space |
7066// | +--------------------------------------------+
7067// | | Local variable space |
7068// | +--------------------------------------------+
7069// | | Float/int conversion temporary |
7070// | +--------------------------------------------+
7071// | | Save area for AltiVec registers |
7072// | +--------------------------------------------+
7073// | | AltiVec alignment padding |
7074// | +--------------------------------------------+
7075// | | Save area for VRSAVE register |
7076// | +--------------------------------------------+
7077// | | Save area for General Purpose registers |
7078// | +--------------------------------------------+
7079// | | Save area for Floating Point registers |
7080// | +--------------------------------------------+
7081// +---- | Back chain |
7082// High Memory +--------------------------------------------+
7083//
7084// Specifications:
7085// AIX 7.2 Assembler Language Reference
7086// Subroutine linkage convention
7087
7088SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7089 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7090 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7091 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7092
7093 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7094 CallConv == CallingConv::Fast) &&
7095 "Unexpected calling convention!");
7096
7097 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7098 report_fatal_error("Tail call support is unimplemented on AIX.");
7099
7100 if (useSoftFloat())
7101 report_fatal_error("Soft float support is unimplemented on AIX.");
7102
7103 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7104
7105 const bool IsPPC64 = Subtarget.isPPC64();
7106 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7107
7108 // Assign locations to all of the incoming arguments.
7110 MachineFunction &MF = DAG.getMachineFunction();
7111 MachineFrameInfo &MFI = MF.getFrameInfo();
7112 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7113 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7114
7115 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7116 // Reserve space for the linkage area on the stack.
7117 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7118 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7119 uint64_t SaveStackPos = CCInfo.getStackSize();
7120 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7121 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7122
7124
7125 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7126 CCValAssign &VA = ArgLocs[I++];
7127 MVT LocVT = VA.getLocVT();
7128 MVT ValVT = VA.getValVT();
7129 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7130
7131 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7132 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7133 // For compatibility with the AIX XL compiler, the float args in the
7134 // parameter save area are initialized even if the argument is available
7135 // in register. The caller is required to initialize both the register
7136 // and memory, however, the callee can choose to expect it in either.
7137 // The memloc is dismissed here because the argument is retrieved from
7138 // the register.
7139 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7140 continue;
7141
7142 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7143 const TargetRegisterClass *RegClass = getRegClassForSVT(
7144 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7145 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7146 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7147 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7148 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7149 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7150 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7151 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7152 MachinePointerInfo(), Align(PtrByteSize));
7153 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7154 MemOps.push_back(StoreReg);
7155 }
7156
7157 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7158 unsigned StoreSize =
7159 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7160 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7161 }
7162
7163 auto HandleMemLoc = [&]() {
7164 const unsigned LocSize = LocVT.getStoreSize();
7165 const unsigned ValSize = ValVT.getStoreSize();
7166 assert((ValSize <= LocSize) &&
7167 "Object size is larger than size of MemLoc");
7168 int CurArgOffset = VA.getLocMemOffset();
7169 // Objects are right-justified because AIX is big-endian.
7170 if (LocSize > ValSize)
7171 CurArgOffset += LocSize - ValSize;
7172 // Potential tail calls could cause overwriting of argument stack slots.
7173 const bool IsImmutable =
7175 (CallConv == CallingConv::Fast));
7176 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7177 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7178 SDValue ArgValue =
7179 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7180
7181 // While the ABI specifies the argument type is (sign or zero) extended
7182 // out to register width, not all code is compliant. We truncate and
7183 // re-extend to be more forgiving of these callers when the argument type
7184 // is smaller than register width.
7185 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7186 ValVT.isInteger() &&
7187 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7188 // It is possible to have either real integer values
7189 // or integers that were not originally integers.
7190 // In the latter case, these could have came from structs,
7191 // and these integers would not have an extend on the parameter.
7192 // Since these types of integers do not have an extend specified
7193 // in the first place, the type of extend that we do should not matter.
7194 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7195 ? MVT::i8
7196 : ArgVT;
7197 SDValue ArgValueTrunc =
7198 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7199 SDValue ArgValueExt =
7200 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7201 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7202 InVals.push_back(ArgValueExt);
7203 } else {
7204 InVals.push_back(ArgValue);
7205 }
7206 };
7207
7208 // Vector arguments to VaArg functions are passed both on the stack, and
7209 // in any available GPRs. Load the value from the stack and add the GPRs
7210 // as live ins.
7211 if (VA.isMemLoc() && VA.needsCustom()) {
7212 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7213 assert(isVarArg && "Only use custom memloc for vararg.");
7214 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7215 // matching custom RegLocs.
7216 const unsigned OriginalValNo = VA.getValNo();
7217 (void)OriginalValNo;
7218
7219 auto HandleCustomVecRegLoc = [&]() {
7220 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7221 "Missing custom RegLoc.");
7222 VA = ArgLocs[I++];
7223 assert(VA.getValVT().isVector() &&
7224 "Unexpected Val type for custom RegLoc.");
7225 assert(VA.getValNo() == OriginalValNo &&
7226 "ValNo mismatch between custom MemLoc and RegLoc.");
7228 MF.addLiveIn(VA.getLocReg(),
7229 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7230 Subtarget.hasVSX()));
7231 };
7232
7233 HandleMemLoc();
7234 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7235 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7236 // R10.
7237 HandleCustomVecRegLoc();
7238 HandleCustomVecRegLoc();
7239
7240 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7241 // we passed the vector in R5, R6, R7 and R8.
7242 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7243 assert(!IsPPC64 &&
7244 "Only 2 custom RegLocs expected for 64-bit codegen.");
7245 HandleCustomVecRegLoc();
7246 HandleCustomVecRegLoc();
7247 }
7248
7249 continue;
7250 }
7251
7252 if (VA.isRegLoc()) {
7253 if (VA.getValVT().isScalarInteger())
7255 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7256 switch (VA.getValVT().SimpleTy) {
7257 default:
7258 report_fatal_error("Unhandled value type for argument.");
7259 case MVT::f32:
7261 break;
7262 case MVT::f64:
7264 break;
7265 }
7266 } else if (VA.getValVT().isVector()) {
7267 switch (VA.getValVT().SimpleTy) {
7268 default:
7269 report_fatal_error("Unhandled value type for argument.");
7270 case MVT::v16i8:
7272 break;
7273 case MVT::v8i16:
7275 break;
7276 case MVT::v4i32:
7277 case MVT::v2i64:
7278 case MVT::v1i128:
7280 break;
7281 case MVT::v4f32:
7282 case MVT::v2f64:
7284 break;
7285 }
7286 }
7287 }
7288
7289 if (Flags.isByVal() && VA.isMemLoc()) {
7290 const unsigned Size =
7291 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7292 PtrByteSize);
7293 const int FI = MF.getFrameInfo().CreateFixedObject(
7294 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7295 /* IsAliased */ true);
7296 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7297 InVals.push_back(FIN);
7298
7299 continue;
7300 }
7301
7302 if (Flags.isByVal()) {
7303 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7304
7305 const MCPhysReg ArgReg = VA.getLocReg();
7306 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7307
7308 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7309 const int FI = MF.getFrameInfo().CreateFixedObject(
7310 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7311 /* IsAliased */ true);
7312 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7313 InVals.push_back(FIN);
7314
7315 // Add live ins for all the RegLocs for the same ByVal.
7316 const TargetRegisterClass *RegClass =
7317 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7318
7319 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7320 unsigned Offset) {
7321 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7322 // Since the callers side has left justified the aggregate in the
7323 // register, we can simply store the entire register into the stack
7324 // slot.
7325 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7326 // The store to the fixedstack object is needed becuase accessing a
7327 // field of the ByVal will use a gep and load. Ideally we will optimize
7328 // to extracting the value from the register directly, and elide the
7329 // stores when the arguments address is not taken, but that will need to
7330 // be future work.
7331 SDValue Store = DAG.getStore(
7332 CopyFrom.getValue(1), dl, CopyFrom,
7335
7336 MemOps.push_back(Store);
7337 };
7338
7339 unsigned Offset = 0;
7340 HandleRegLoc(VA.getLocReg(), Offset);
7341 Offset += PtrByteSize;
7342 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7343 Offset += PtrByteSize) {
7344 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7345 "RegLocs should be for ByVal argument.");
7346
7347 const CCValAssign RL = ArgLocs[I++];
7348 HandleRegLoc(RL.getLocReg(), Offset);
7350 }
7351
7352 if (Offset != StackSize) {
7353 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7354 "Expected MemLoc for remaining bytes.");
7355 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7356 // Consume the MemLoc.The InVal has already been emitted, so nothing
7357 // more needs to be done.
7358 ++I;
7359 }
7360
7361 continue;
7362 }
7363
7364 if (VA.isRegLoc() && !VA.needsCustom()) {
7365 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7366 Register VReg =
7367 MF.addLiveIn(VA.getLocReg(),
7368 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7369 Subtarget.hasVSX()));
7370 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7371 if (ValVT.isScalarInteger() &&
7372 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7373 ArgValue =
7374 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7375 }
7376 InVals.push_back(ArgValue);
7377 continue;
7378 }
7379 if (VA.isMemLoc()) {
7380 HandleMemLoc();
7381 continue;
7382 }
7383 }
7384
7385 // On AIX a minimum of 8 words is saved to the parameter save area.
7386 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7387 // Area that is at least reserved in the caller of this function.
7388 unsigned CallerReservedArea = std::max<unsigned>(
7389 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7390
7391 // Set the size that is at least reserved in caller of this function. Tail
7392 // call optimized function's reserved stack space needs to be aligned so
7393 // that taking the difference between two stack areas will result in an
7394 // aligned stack.
7395 CallerReservedArea =
7396 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7397 FuncInfo->setMinReservedArea(CallerReservedArea);
7398
7399 if (isVarArg) {
7400 int VAListIndex = 0;
7401 // If any of the optional arguments are passed in register then the fixed
7402 // stack object we spill into is not immutable. Create a fixed stack object
7403 // that overlaps the remainder of the parameter save area.
7404 if (CCInfo.getStackSize() < (LinkageSize + MinParameterSaveArea)) {
7405 unsigned FixedStackSize =
7406 LinkageSize + MinParameterSaveArea - CCInfo.getStackSize();
7407 VAListIndex =
7408 MFI.CreateFixedObject(FixedStackSize, CCInfo.getStackSize(),
7409 /* IsImmutable */ false, /* IsAliased */ true);
7410 } else {
7411 // All the arguments passed through ellipses are on the stack. Create a
7412 // dummy fixed stack object the same size as a pointer since we don't
7413 // know the actual size.
7414 VAListIndex =
7415 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(),
7416 /* IsImmutable */ true, /* IsAliased */ true);
7417 }
7418
7419 FuncInfo->setVarArgsFrameIndex(VAListIndex);
7420 SDValue FIN = DAG.getFrameIndex(VAListIndex, PtrVT);
7421
7422 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7423 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7424
7425 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7426 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7427 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7428
7429 // The fixed integer arguments of a variadic function are stored to the
7430 // VarArgsFrameIndex on the stack so that they may be loaded by
7431 // dereferencing the result of va_next.
7432 for (unsigned
7433 GPRIndex = (CCInfo.getStackSize() - LinkageSize) / PtrByteSize,
7434 Offset = 0;
7435 GPRIndex < NumGPArgRegs; ++GPRIndex, Offset += PtrByteSize) {
7436
7437 const Register VReg =
7438 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7439 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7440
7441 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7442 MachinePointerInfo MPI =
7443 MachinePointerInfo::getFixedStack(MF, VAListIndex, Offset);
7444 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MPI);
7445 MemOps.push_back(Store);
7446 // Increment the address for the next argument to store.
7447 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7448 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7449 }
7450 }
7451
7452 if (!MemOps.empty())
7453 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7454
7455 return Chain;
7456}
7457
7458SDValue PPCTargetLowering::LowerCall_AIX(
7459 SDValue Chain, SDValue Callee, CallFlags CFlags,
7461 const SmallVectorImpl<SDValue> &OutVals,
7462 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7464 const CallBase *CB) const {
7465 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7466 // AIX ABI stack frame layout.
7467
7468 assert((CFlags.CallConv == CallingConv::C ||
7469 CFlags.CallConv == CallingConv::Cold ||
7470 CFlags.CallConv == CallingConv::Fast) &&
7471 "Unexpected calling convention!");
7472
7473 if (CFlags.IsPatchPoint)
7474 report_fatal_error("This call type is unimplemented on AIX.");
7475
7476 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7477
7478 MachineFunction &MF = DAG.getMachineFunction();
7480 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7481 *DAG.getContext());
7482
7483 // Reserve space for the linkage save area (LSA) on the stack.
7484 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7485 // [SP][CR][LR][2 x reserved][TOC].
7486 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7487 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7488 const bool IsPPC64 = Subtarget.isPPC64();
7489 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7490 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7491 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7492 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7493
7494 // The prolog code of the callee may store up to 8 GPR argument registers to
7495 // the stack, allowing va_start to index over them in memory if the callee
7496 // is variadic.
7497 // Because we cannot tell if this is needed on the caller side, we have to
7498 // conservatively assume that it is needed. As such, make sure we have at
7499 // least enough stack space for the caller to store the 8 GPRs.
7500 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7501 const unsigned NumBytes = std::max<unsigned>(
7502 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7503
7504 // Adjust the stack pointer for the new arguments...
7505 // These operations are automatically eliminated by the prolog/epilog pass.
7506 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7507 SDValue CallSeqStart = Chain;
7508
7510 SmallVector<SDValue, 8> MemOpChains;
7511
7512 // Set up a copy of the stack pointer for loading and storing any
7513 // arguments that may not fit in the registers available for argument
7514 // passing.
7515 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7516 : DAG.getRegister(PPC::R1, MVT::i32);
7517
7518 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7519 const unsigned ValNo = ArgLocs[I].getValNo();
7520 SDValue Arg = OutVals[ValNo];
7521 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7522
7523 if (Flags.isByVal()) {
7524 const unsigned ByValSize = Flags.getByValSize();
7525
7526 // Nothing to do for zero-sized ByVals on the caller side.
7527 if (!ByValSize) {
7528 ++I;
7529 continue;
7530 }
7531
7532 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7533 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7534 (LoadOffset != 0)
7535 ? DAG.getObjectPtrOffset(
7536 dl, Arg, TypeSize::getFixed(LoadOffset))
7537 : Arg,
7538 MachinePointerInfo(), VT);
7539 };
7540
7541 unsigned LoadOffset = 0;
7542
7543 // Initialize registers, which are fully occupied by the by-val argument.
7544 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7545 SDValue Load = GetLoad(PtrVT, LoadOffset);
7546 MemOpChains.push_back(Load.getValue(1));
7547 LoadOffset += PtrByteSize;
7548 const CCValAssign &ByValVA = ArgLocs[I++];
7549 assert(ByValVA.getValNo() == ValNo &&
7550 "Unexpected location for pass-by-value argument.");
7551 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7552 }
7553
7554 if (LoadOffset == ByValSize)
7555 continue;
7556
7557 // There must be one more loc to handle the remainder.
7558 assert(ArgLocs[I].getValNo() == ValNo &&
7559 "Expected additional location for by-value argument.");
7560
7561 if (ArgLocs[I].isMemLoc()) {
7562 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7563 const CCValAssign &ByValVA = ArgLocs[I++];
7564 ISD::ArgFlagsTy MemcpyFlags = Flags;
7565 // Only memcpy the bytes that don't pass in register.
7566 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7567 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7568 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7569 dl, Arg, TypeSize::getFixed(LoadOffset))
7570 : Arg,
7572 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7573 CallSeqStart, MemcpyFlags, DAG, dl);
7574 continue;
7575 }
7576
7577 // Initialize the final register residue.
7578 // Any residue that occupies the final by-val arg register must be
7579 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7580 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7581 // 2 and 1 byte loads.
7582 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7583 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7584 "Unexpected register residue for by-value argument.");
7585 SDValue ResidueVal;
7586 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7587 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7588 const MVT VT =
7589 N == 1 ? MVT::i8
7590 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7591 SDValue Load = GetLoad(VT, LoadOffset);
7592 MemOpChains.push_back(Load.getValue(1));
7593 LoadOffset += N;
7594 Bytes += N;
7595
7596 // By-val arguments are passed left-justfied in register.
7597 // Every load here needs to be shifted, otherwise a full register load
7598 // should have been used.
7599 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7600 "Unexpected load emitted during handling of pass-by-value "
7601 "argument.");
7602 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7603 EVT ShiftAmountTy =
7604 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7605 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7606 SDValue ShiftedLoad =
7607 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7608 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7609 ShiftedLoad)
7610 : ShiftedLoad;
7611 }
7612
7613 const CCValAssign &ByValVA = ArgLocs[I++];
7614 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7615 continue;
7616 }
7617
7618 CCValAssign &VA = ArgLocs[I++];
7619 const MVT LocVT = VA.getLocVT();
7620 const MVT ValVT = VA.getValVT();
7621
7622 switch (VA.getLocInfo()) {
7623 default:
7624 report_fatal_error("Unexpected argument extension type.");
7625 case CCValAssign::Full:
7626 break;
7627 case CCValAssign::ZExt:
7628 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7629 break;
7630 case CCValAssign::SExt:
7631 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7632 break;
7633 }
7634
7635 if (VA.isRegLoc() && !VA.needsCustom()) {
7636 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7637 continue;
7638 }
7639
7640 // Vector arguments passed to VarArg functions need custom handling when
7641 // they are passed (at least partially) in GPRs.
7642 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7643 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7644 // Store value to its stack slot.
7645 SDValue PtrOff =
7646 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7647 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7648 SDValue Store =
7649 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7650 MemOpChains.push_back(Store);
7651 const unsigned OriginalValNo = VA.getValNo();
7652 // Then load the GPRs from the stack
7653 unsigned LoadOffset = 0;
7654 auto HandleCustomVecRegLoc = [&]() {
7655 assert(I != E && "Unexpected end of CCvalAssigns.");
7656 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7657 "Expected custom RegLoc.");
7658 CCValAssign RegVA = ArgLocs[I++];
7659 assert(RegVA.getValNo() == OriginalValNo &&
7660 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7661 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7662 DAG.getConstant(LoadOffset, dl, PtrVT));
7663 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7664 MemOpChains.push_back(Load.getValue(1));
7665 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7666 LoadOffset += PtrByteSize;
7667 };
7668
7669 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7670 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7671 // R10.
7672 HandleCustomVecRegLoc();
7673 HandleCustomVecRegLoc();
7674
7675 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7676 ArgLocs[I].getValNo() == OriginalValNo) {
7677 assert(!IsPPC64 &&
7678 "Only 2 custom RegLocs expected for 64-bit codegen.");
7679 HandleCustomVecRegLoc();
7680 HandleCustomVecRegLoc();
7681 }
7682
7683 continue;
7684 }
7685
7686 if (VA.isMemLoc()) {
7687 SDValue PtrOff =
7688 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7689 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7690 MemOpChains.push_back(
7691 DAG.getStore(Chain, dl, Arg, PtrOff,
7693 Subtarget.getFrameLowering()->getStackAlign()));
7694
7695 continue;
7696 }
7697
7698 if (!ValVT.isFloatingPoint())
7700 "Unexpected register handling for calling convention.");
7701
7702 // Custom handling is used for GPR initializations for vararg float
7703 // arguments.
7704 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7705 LocVT.isInteger() &&
7706 "Custom register handling only expected for VarArg.");
7707
7708 SDValue ArgAsInt =
7709 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7710
7711 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7712 // f32 in 32-bit GPR
7713 // f64 in 64-bit GPR
7714 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7715 else if (Arg.getValueType().getFixedSizeInBits() <
7716 LocVT.getFixedSizeInBits())
7717 // f32 in 64-bit GPR.
7718 RegsToPass.push_back(std::make_pair(
7719 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7720 else {
7721 // f64 in two 32-bit GPRs
7722 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7723 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7724 "Unexpected custom register for argument!");
7725 CCValAssign &GPR1 = VA;
7726 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7727 DAG.getConstant(32, dl, MVT::i8));
7728 RegsToPass.push_back(std::make_pair(
7729 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7730
7731 if (I != E) {
7732 // If only 1 GPR was available, there will only be one custom GPR and
7733 // the argument will also pass in memory.
7734 CCValAssign &PeekArg = ArgLocs[I];
7735 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7736 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7737 CCValAssign &GPR2 = ArgLocs[I++];
7738 RegsToPass.push_back(std::make_pair(
7739 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7740 }
7741 }
7742 }
7743 }
7744
7745 if (!MemOpChains.empty())
7746 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7747
7748 // For indirect calls, we need to save the TOC base to the stack for
7749 // restoration after the call.
7750 if (CFlags.IsIndirect) {
7751 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7752 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7753 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7754 const MVT PtrVT = Subtarget.getScalarIntVT();
7755 const unsigned TOCSaveOffset =
7756 Subtarget.getFrameLowering()->getTOCSaveOffset();
7757
7758 setUsesTOCBasePtr(DAG);
7759 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7760 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7761 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7762 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7763 Chain = DAG.getStore(
7764 Val.getValue(1), dl, Val, AddPtr,
7765 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7766 }
7767
7768 // Build a sequence of copy-to-reg nodes chained together with token chain
7769 // and flag operands which copy the outgoing args into the appropriate regs.
7770 SDValue InGlue;
7771 for (auto Reg : RegsToPass) {
7772 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7773 InGlue = Chain.getValue(1);
7774 }
7775
7776 const int SPDiff = 0;
7777 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7778 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7779}
7780
7781bool
7782PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7783 MachineFunction &MF, bool isVarArg,
7786 const Type *RetTy) const {
7788 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7789 return CCInfo.CheckReturn(
7790 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7792 : RetCC_PPC);
7793}
7794
7795SDValue
7796PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7797 bool isVarArg,
7799 const SmallVectorImpl<SDValue> &OutVals,
7800 const SDLoc &dl, SelectionDAG &DAG) const {
7802 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7803 *DAG.getContext());
7804 CCInfo.AnalyzeReturn(Outs,
7805 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7807 : RetCC_PPC);
7808
7809 SDValue Glue;
7810 SmallVector<SDValue, 4> RetOps(1, Chain);
7811
7812 // Copy the result values into the output registers.
7813 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7814 CCValAssign &VA = RVLocs[i];
7815 assert(VA.isRegLoc() && "Can only return in registers!");
7816
7817 SDValue Arg = OutVals[RealResIdx];
7818
7819 switch (VA.getLocInfo()) {
7820 default: llvm_unreachable("Unknown loc info!");
7821 case CCValAssign::Full: break;
7822 case CCValAssign::AExt:
7823 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7824 break;
7825 case CCValAssign::ZExt:
7826 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7827 break;
7828 case CCValAssign::SExt:
7829 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7830 break;
7831 }
7832 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7833 bool isLittleEndian = Subtarget.isLittleEndian();
7834 // Legalize ret f64 -> ret 2 x i32.
7835 SDValue SVal =
7836 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7837 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7838 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7839 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7840 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7841 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7842 Glue = Chain.getValue(1);
7843 VA = RVLocs[++i]; // skip ahead to next loc
7844 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7845 } else
7846 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7847 Glue = Chain.getValue(1);
7848 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7849 }
7850
7851 RetOps[0] = Chain; // Update chain.
7852
7853 // Add the glue if we have it.
7854 if (Glue.getNode())
7855 RetOps.push_back(Glue);
7856
7857 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7858}
7859
7860SDValue
7861PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7862 SelectionDAG &DAG) const {
7863 SDLoc dl(Op);
7864
7865 // Get the correct type for integers.
7866 EVT IntVT = Op.getValueType();
7867
7868 // Get the inputs.
7869 SDValue Chain = Op.getOperand(0);
7870 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7871 // Build a DYNAREAOFFSET node.
7872 SDValue Ops[2] = {Chain, FPSIdx};
7873 SDVTList VTs = DAG.getVTList(IntVT);
7874 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7875}
7876
7877SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7878 SelectionDAG &DAG) const {
7879 // When we pop the dynamic allocation we need to restore the SP link.
7880 SDLoc dl(Op);
7881
7882 // Get the correct type for pointers.
7883 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7884
7885 // Construct the stack pointer operand.
7886 bool isPPC64 = Subtarget.isPPC64();
7887 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7888 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7889
7890 // Get the operands for the STACKRESTORE.
7891 SDValue Chain = Op.getOperand(0);
7892 SDValue SaveSP = Op.getOperand(1);
7893
7894 // Load the old link SP.
7895 SDValue LoadLinkSP =
7896 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7897
7898 // Restore the stack pointer.
7899 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7900
7901 // Store the old link SP.
7902 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7903}
7904
7905SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7906 MachineFunction &MF = DAG.getMachineFunction();
7907 bool isPPC64 = Subtarget.isPPC64();
7908 EVT PtrVT = getPointerTy(MF.getDataLayout());
7909
7910 // Get current frame pointer save index. The users of this index will be
7911 // primarily DYNALLOC instructions.
7912 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7913 int RASI = FI->getReturnAddrSaveIndex();
7914
7915 // If the frame pointer save index hasn't been defined yet.
7916 if (!RASI) {
7917 // Find out what the fix offset of the frame pointer save area.
7918 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7919 // Allocate the frame index for frame pointer save area.
7920 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7921 // Save the result.
7922 FI->setReturnAddrSaveIndex(RASI);
7923 }
7924 return DAG.getFrameIndex(RASI, PtrVT);
7925}
7926
7927SDValue
7928PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7929 MachineFunction &MF = DAG.getMachineFunction();
7930 bool isPPC64 = Subtarget.isPPC64();
7931 EVT PtrVT = getPointerTy(MF.getDataLayout());
7932
7933 // Get current frame pointer save index. The users of this index will be
7934 // primarily DYNALLOC instructions.
7935 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7936 int FPSI = FI->getFramePointerSaveIndex();
7937
7938 // If the frame pointer save index hasn't been defined yet.
7939 if (!FPSI) {
7940 // Find out what the fix offset of the frame pointer save area.
7941 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7942 // Allocate the frame index for frame pointer save area.
7943 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7944 // Save the result.
7945 FI->setFramePointerSaveIndex(FPSI);
7946 }
7947 return DAG.getFrameIndex(FPSI, PtrVT);
7948}
7949
7950SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7951 SelectionDAG &DAG) const {
7952 MachineFunction &MF = DAG.getMachineFunction();
7953 // Get the inputs.
7954 SDValue Chain = Op.getOperand(0);
7955 SDValue Size = Op.getOperand(1);
7956 SDLoc dl(Op);
7957
7958 // Get the correct type for pointers.
7959 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7960 // Negate the size.
7961 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7962 DAG.getConstant(0, dl, PtrVT), Size);
7963 // Construct a node for the frame pointer save index.
7964 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7965 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7966 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7967 if (hasInlineStackProbe(MF))
7968 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7969 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7970}
7971
7972SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7973 SelectionDAG &DAG) const {
7974 MachineFunction &MF = DAG.getMachineFunction();
7975
7976 bool isPPC64 = Subtarget.isPPC64();
7977 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7978
7979 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7980 return DAG.getFrameIndex(FI, PtrVT);
7981}
7982
7983SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7984 SelectionDAG &DAG) const {
7985 SDLoc DL(Op);
7986 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7987 DAG.getVTList(MVT::i32, MVT::Other),
7988 Op.getOperand(0), Op.getOperand(1));
7989}
7990
7991SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7992 SelectionDAG &DAG) const {
7993 SDLoc DL(Op);
7994 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7995 Op.getOperand(0), Op.getOperand(1));
7996}
7997
7998SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7999 if (Op.getValueType().isVector())
8000 return LowerVectorLoad(Op, DAG);
8001
8002 assert(Op.getValueType() == MVT::i1 &&
8003 "Custom lowering only for i1 loads");
8004
8005 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8006
8007 SDLoc dl(Op);
8008 LoadSDNode *LD = cast<LoadSDNode>(Op);
8009
8010 SDValue Chain = LD->getChain();
8011 SDValue BasePtr = LD->getBasePtr();
8012 MachineMemOperand *MMO = LD->getMemOperand();
8013
8014 SDValue NewLD =
8015 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8016 BasePtr, MVT::i8, MMO);
8017 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8018
8019 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8020 return DAG.getMergeValues(Ops, dl);
8021}
8022
8023SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8024 if (Op.getOperand(1).getValueType().isVector())
8025 return LowerVectorStore(Op, DAG);
8026
8027 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8028 "Custom lowering only for i1 stores");
8029
8030 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8031
8032 SDLoc dl(Op);
8033 StoreSDNode *ST = cast<StoreSDNode>(Op);
8034
8035 SDValue Chain = ST->getChain();
8036 SDValue BasePtr = ST->getBasePtr();
8037 SDValue Value = ST->getValue();
8038 MachineMemOperand *MMO = ST->getMemOperand();
8039
8041 Value);
8042 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8043}
8044
8045// FIXME: Remove this once the ANDI glue bug is fixed:
8046SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8047 assert(Op.getValueType() == MVT::i1 &&
8048 "Custom lowering only for i1 results");
8049
8050 SDLoc DL(Op);
8051 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8052}
8053
8054SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8055 SelectionDAG &DAG) const {
8056
8057 // Implements a vector truncate that fits in a vector register as a shuffle.
8058 // We want to legalize vector truncates down to where the source fits in
8059 // a vector register (and target is therefore smaller than vector register
8060 // size). At that point legalization will try to custom lower the sub-legal
8061 // result and get here - where we can contain the truncate as a single target
8062 // operation.
8063
8064 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8065 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8066 //
8067 // We will implement it for big-endian ordering as this (where x denotes
8068 // undefined):
8069 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8070 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8071 //
8072 // The same operation in little-endian ordering will be:
8073 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8074 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8075
8076 EVT TrgVT = Op.getValueType();
8077 assert(TrgVT.isVector() && "Vector type expected.");
8078 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8079 EVT EltVT = TrgVT.getVectorElementType();
8080 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8081 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8083 return SDValue();
8084
8085 SDValue N1 = Op.getOperand(0);
8086 EVT SrcVT = N1.getValueType();
8087 unsigned SrcSize = SrcVT.getSizeInBits();
8088 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8091 return SDValue();
8092 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8093 return SDValue();
8094
8095 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8096 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8097
8098 SDLoc DL(Op);
8099 SDValue Op1, Op2;
8100 if (SrcSize == 256) {
8101 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8102 EVT SplitVT =
8104 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8105 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8106 DAG.getConstant(0, DL, VecIdxTy));
8107 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8108 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8109 }
8110 else {
8111 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8112 Op2 = DAG.getUNDEF(WideVT);
8113 }
8114
8115 // First list the elements we want to keep.
8116 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8117 SmallVector<int, 16> ShuffV;
8118 if (Subtarget.isLittleEndian())
8119 for (unsigned i = 0; i < TrgNumElts; ++i)
8120 ShuffV.push_back(i * SizeMult);
8121 else
8122 for (unsigned i = 1; i <= TrgNumElts; ++i)
8123 ShuffV.push_back(i * SizeMult - 1);
8124
8125 // Populate the remaining elements with undefs.
8126 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8127 // ShuffV.push_back(i + WideNumElts);
8128 ShuffV.push_back(WideNumElts + 1);
8129
8130 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8131 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8132 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8133}
8134
8135/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8136/// possible.
8137SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8138 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8139 EVT ResVT = Op.getValueType();
8140 EVT CmpVT = Op.getOperand(0).getValueType();
8141 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8142 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8143 SDLoc dl(Op);
8144
8145 // Without power9-vector, we don't have native instruction for f128 comparison.
8146 // Following transformation to libcall is needed for setcc:
8147 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8148 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8149 SDValue Z = DAG.getSetCC(
8150 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8151 LHS, RHS, CC);
8152 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8153 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8154 }
8155
8156 // Not FP, or using SPE? Not a fsel.
8157 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8158 Subtarget.hasSPE())
8159 return Op;
8160
8161 SDNodeFlags Flags = Op.getNode()->getFlags();
8162
8163 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8164 // presence of infinities.
8165 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8166 switch (CC) {
8167 default:
8168 break;
8169 case ISD::SETOGT:
8170 case ISD::SETGT:
8171 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8172 case ISD::SETOLT:
8173 case ISD::SETLT:
8174 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8175 }
8176 }
8177
8178 // We might be able to do better than this under some circumstances, but in
8179 // general, fsel-based lowering of select is a finite-math-only optimization.
8180 // For more information, see section F.3 of the 2.06 ISA specification.
8181 // With ISA 3.0
8182 if (!Flags.hasNoInfs() || !Flags.hasNoNaNs() || ResVT == MVT::f128)
8183 return Op;
8184
8185 // If the RHS of the comparison is a 0.0, we don't need to do the
8186 // subtraction at all.
8187 SDValue Sel1;
8189 switch (CC) {
8190 default: break; // SETUO etc aren't handled by fsel.
8191 case ISD::SETNE:
8192 std::swap(TV, FV);
8193 [[fallthrough]];
8194 case ISD::SETEQ:
8195 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8196 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8197 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8198 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8199 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8200 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8201 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8202 case ISD::SETULT:
8203 case ISD::SETLT:
8204 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8205 [[fallthrough]];
8206 case ISD::SETOGE:
8207 case ISD::SETGE:
8208 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8209 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8210 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8211 case ISD::SETUGT:
8212 case ISD::SETGT:
8213 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8214 [[fallthrough]];
8215 case ISD::SETOLE:
8216 case ISD::SETLE:
8217 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8218 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8219 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8220 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8221 }
8222
8223 SDValue Cmp;
8224 switch (CC) {
8225 default: break; // SETUO etc aren't handled by fsel.
8226 case ISD::SETNE:
8227 std::swap(TV, FV);
8228 [[fallthrough]];
8229 case ISD::SETEQ:
8230 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8231 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8232 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8233 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8234 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8235 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8236 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8237 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8238 case ISD::SETULT:
8239 case ISD::SETLT:
8240 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8241 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8242 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8243 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8244 case ISD::SETOGE:
8245 case ISD::SETGE:
8246 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8247 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8248 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8249 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8250 case ISD::SETUGT:
8251 case ISD::SETGT:
8252 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8253 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8254 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8255 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8256 case ISD::SETOLE:
8257 case ISD::SETLE:
8258 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8259 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8260 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8261 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8262 }
8263 return Op;
8264}
8265
8266static unsigned getPPCStrictOpcode(unsigned Opc) {
8267 switch (Opc) {
8268 default:
8269 llvm_unreachable("No strict version of this opcode!");
8270 case PPCISD::FCTIDZ:
8271 return PPCISD::STRICT_FCTIDZ;
8272 case PPCISD::FCTIWZ:
8273 return PPCISD::STRICT_FCTIWZ;
8274 case PPCISD::FCTIDUZ:
8275 return PPCISD::STRICT_FCTIDUZ;
8276 case PPCISD::FCTIWUZ:
8277 return PPCISD::STRICT_FCTIWUZ;
8278 case PPCISD::FCFID:
8279 return PPCISD::STRICT_FCFID;
8280 case PPCISD::FCFIDU:
8281 return PPCISD::STRICT_FCFIDU;
8282 case PPCISD::FCFIDS:
8283 return PPCISD::STRICT_FCFIDS;
8284 case PPCISD::FCFIDUS:
8285 return PPCISD::STRICT_FCFIDUS;
8286 }
8287}
8288
8290 const PPCSubtarget &Subtarget) {
8291 SDLoc dl(Op);
8292 bool IsStrict = Op->isStrictFPOpcode();
8293 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8294 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8295
8296 // TODO: Any other flags to propagate?
8297 SDNodeFlags Flags;
8298 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8299
8300 // For strict nodes, source is the second operand.
8301 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8302 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8303 MVT DestTy = Op.getSimpleValueType();
8304 assert(Src.getValueType().isFloatingPoint() &&
8305 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8306 DestTy == MVT::i64) &&
8307 "Invalid FP_TO_INT types");
8308 if (Src.getValueType() == MVT::f32) {
8309 if (IsStrict) {
8310 Src =
8312 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8313 Chain = Src.getValue(1);
8314 } else
8315 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8316 }
8317 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8318 DestTy = Subtarget.getScalarIntVT();
8319 unsigned Opc = ISD::DELETED_NODE;
8320 switch (DestTy.SimpleTy) {
8321 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8322 case MVT::i32:
8323 Opc = IsSigned ? PPCISD::FCTIWZ
8324 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8325 break;
8326 case MVT::i64:
8327 assert((IsSigned || Subtarget.hasFPCVT()) &&
8328 "i64 FP_TO_UINT is supported only with FPCVT");
8329 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8330 }
8331 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8332 SDValue Conv;
8333 if (IsStrict) {
8335 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8336 Flags);
8337 } else {
8338 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8339 }
8340 return Conv;
8341}
8342
8343void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8344 SelectionDAG &DAG,
8345 const SDLoc &dl) const {
8346 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8347 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8348 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8349 bool IsStrict = Op->isStrictFPOpcode();
8350
8351 // Convert the FP value to an int value through memory.
8352 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8353 (IsSigned || Subtarget.hasFPCVT());
8354 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8355 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8356 MachinePointerInfo MPI =
8358
8359 // Emit a store to the stack slot.
8360 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8361 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8362 if (i32Stack) {
8363 MachineFunction &MF = DAG.getMachineFunction();
8364 Alignment = Align(4);
8365 MachineMemOperand *MMO =
8366 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8367 SDValue Ops[] = { Chain, Tmp, FIPtr };
8368 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8369 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8370 } else
8371 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8372
8373 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8374 // add in a bias on big endian.
8375 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8376 !Subtarget.isLittleEndian()) {
8377 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8378 DAG.getConstant(4, dl, FIPtr.getValueType()));
8379 MPI = MPI.getWithOffset(4);
8380 }
8381
8382 RLI.Chain = Chain;
8383 RLI.Ptr = FIPtr;
8384 RLI.MPI = MPI;
8385 RLI.Alignment = Alignment;
8386}
8387
8388/// Custom lowers floating point to integer conversions to use
8389/// the direct move instructions available in ISA 2.07 to avoid the
8390/// need for load/store combinations.
8391SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8392 SelectionDAG &DAG,
8393 const SDLoc &dl) const {
8394 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8395 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8396 if (Op->isStrictFPOpcode())
8397 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8398 else
8399 return Mov;
8400}
8401
8402SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8403 const SDLoc &dl) const {
8404 bool IsStrict = Op->isStrictFPOpcode();
8405 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8406 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8407 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8408 EVT SrcVT = Src.getValueType();
8409 EVT DstVT = Op.getValueType();
8410
8411 // FP to INT conversions are legal for f128.
8412 if (SrcVT == MVT::f128)
8413 return Subtarget.hasP9Vector() ? Op : SDValue();
8414
8415 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8416 // PPC (the libcall is not available).
8417 if (SrcVT == MVT::ppcf128) {
8418 if (DstVT == MVT::i32) {
8419 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8420 // set other fast-math flags to FP operations in both strict and
8421 // non-strict cases. (FP_TO_SINT, FSUB)
8422 SDNodeFlags Flags;
8423 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8424
8425 if (IsSigned) {
8426 SDValue Lo, Hi;
8427 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8428
8429 // Add the two halves of the long double in round-to-zero mode, and use
8430 // a smaller FP_TO_SINT.
8431 if (IsStrict) {
8432 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8433 DAG.getVTList(MVT::f64, MVT::Other),
8434 {Op.getOperand(0), Lo, Hi}, Flags);
8435 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8436 DAG.getVTList(MVT::i32, MVT::Other),
8437 {Res.getValue(1), Res}, Flags);
8438 } else {
8439 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8440 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8441 }
8442 } else {
8443 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8444 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8445 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8446 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8447 if (IsStrict) {
8448 // Sel = Src < 0x80000000
8449 // FltOfs = select Sel, 0.0, 0x80000000
8450 // IntOfs = select Sel, 0, 0x80000000
8451 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8452 SDValue Chain = Op.getOperand(0);
8453 EVT SetCCVT =
8454 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8455 EVT DstSetCCVT =
8456 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8457 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8458 Chain, true);
8459 Chain = Sel.getValue(1);
8460
8461 SDValue FltOfs = DAG.getSelect(
8462 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8463 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8464
8465 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8466 DAG.getVTList(SrcVT, MVT::Other),
8467 {Chain, Src, FltOfs}, Flags);
8468 Chain = Val.getValue(1);
8469 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8470 DAG.getVTList(DstVT, MVT::Other),
8471 {Chain, Val}, Flags);
8472 Chain = SInt.getValue(1);
8473 SDValue IntOfs = DAG.getSelect(
8474 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8475 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8476 return DAG.getMergeValues({Result, Chain}, dl);
8477 } else {
8478 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8479 // FIXME: generated code sucks.
8480 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8481 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8482 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8483 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8484 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8485 }
8486 }
8487 }
8488
8489 return SDValue();
8490 }
8491
8492 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8493 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8494
8495 ReuseLoadInfo RLI;
8496 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8497
8498 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8499 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8500}
8501
8502// We're trying to insert a regular store, S, and then a load, L. If the
8503// incoming value, O, is a load, we might just be able to have our load use the
8504// address used by O. However, we don't know if anything else will store to
8505// that address before we can load from it. To prevent this situation, we need
8506// to insert our load, L, into the chain as a peer of O. To do this, we give L
8507// the same chain operand as O, we create a token factor from the chain results
8508// of O and L, and we replace all uses of O's chain result with that token
8509// factor (this last part is handled by makeEquivalentMemoryOrdering).
8510bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8511 ReuseLoadInfo &RLI,
8512 SelectionDAG &DAG,
8513 ISD::LoadExtType ET) const {
8514 // Conservatively skip reusing for constrained FP nodes.
8515 if (Op->isStrictFPOpcode())
8516 return false;
8517
8518 SDLoc dl(Op);
8519 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8520 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8521 if (ET == ISD::NON_EXTLOAD &&
8522 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8523 isOperationLegalOrCustom(Op.getOpcode(),
8524 Op.getOperand(0).getValueType())) {
8525
8526 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8527 return true;
8528 }
8529
8530 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8531 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8532 LD->isNonTemporal())
8533 return false;
8534 if (LD->getMemoryVT() != MemVT)
8535 return false;
8536
8537 // If the result of the load is an illegal type, then we can't build a
8538 // valid chain for reuse since the legalised loads and token factor node that
8539 // ties the legalised loads together uses a different output chain then the
8540 // illegal load.
8541 if (!isTypeLegal(LD->getValueType(0)))
8542 return false;
8543
8544 RLI.Ptr = LD->getBasePtr();
8545 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8546 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8547 "Non-pre-inc AM on PPC?");
8548 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8549 LD->getOffset());
8550 }
8551
8552 RLI.Chain = LD->getChain();
8553 RLI.MPI = LD->getPointerInfo();
8554 RLI.IsDereferenceable = LD->isDereferenceable();
8555 RLI.IsInvariant = LD->isInvariant();
8556 RLI.Alignment = LD->getAlign();
8557 RLI.AAInfo = LD->getAAInfo();
8558 RLI.Ranges = LD->getRanges();
8559
8560 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8561 return true;
8562}
8563
8564/// Analyze profitability of direct move
8565/// prefer float load to int load plus direct move
8566/// when there is no integer use of int load
8567bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8568 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8569 if (Origin->getOpcode() != ISD::LOAD)
8570 return true;
8571
8572 // If there is no LXSIBZX/LXSIHZX, like Power8,
8573 // prefer direct move if the memory size is 1 or 2 bytes.
8574 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8575 if (!Subtarget.hasP9Vector() &&
8576 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8577 return true;
8578
8579 for (SDUse &Use : Origin->uses()) {
8580
8581 // Only look at the users of the loaded value.
8582 if (Use.getResNo() != 0)
8583 continue;
8584
8585 SDNode *User = Use.getUser();
8586 if (User->getOpcode() != ISD::SINT_TO_FP &&
8587 User->getOpcode() != ISD::UINT_TO_FP &&
8588 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8589 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8590 return true;
8591 }
8592
8593 return false;
8594}
8595
8597 const PPCSubtarget &Subtarget,
8598 SDValue Chain = SDValue()) {
8599 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8600 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8601 SDLoc dl(Op);
8602
8603 // TODO: Any other flags to propagate?
8604 SDNodeFlags Flags;
8605 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8606
8607 // If we have FCFIDS, then use it when converting to single-precision.
8608 // Otherwise, convert to double-precision and then round.
8609 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8610 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8611 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8612 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8613 if (Op->isStrictFPOpcode()) {
8614 if (!Chain)
8615 Chain = Op.getOperand(0);
8616 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8617 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8618 } else
8619 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8620}
8621
8622/// Custom lowers integer to floating point conversions to use
8623/// the direct move instructions available in ISA 2.07 to avoid the
8624/// need for load/store combinations.
8625SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8626 SelectionDAG &DAG,
8627 const SDLoc &dl) const {
8628 assert((Op.getValueType() == MVT::f32 ||
8629 Op.getValueType() == MVT::f64) &&
8630 "Invalid floating point type as target of conversion");
8631 assert(Subtarget.hasFPCVT() &&
8632 "Int to FP conversions with direct moves require FPCVT");
8633 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8634 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8635 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8636 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8637 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8638 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8639 return convertIntToFP(Op, Mov, DAG, Subtarget);
8640}
8641
8642static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8643
8644 EVT VecVT = Vec.getValueType();
8645 assert(VecVT.isVector() && "Expected a vector type.");
8646 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8647
8648 EVT EltVT = VecVT.getVectorElementType();
8649 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8650 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8651
8652 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8653 SmallVector<SDValue, 16> Ops(NumConcat);
8654 Ops[0] = Vec;
8655 SDValue UndefVec = DAG.getUNDEF(VecVT);
8656 for (unsigned i = 1; i < NumConcat; ++i)
8657 Ops[i] = UndefVec;
8658
8659 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8660}
8661
8662SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8663 const SDLoc &dl) const {
8664 bool IsStrict = Op->isStrictFPOpcode();
8665 unsigned Opc = Op.getOpcode();
8666 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8669 "Unexpected conversion type");
8670 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8671 "Supports conversions to v2f64/v4f32 only.");
8672
8673 // TODO: Any other flags to propagate?
8674 SDNodeFlags Flags;
8675 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8676
8677 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8678 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8679
8680 SDValue Wide = widenVec(DAG, Src, dl);
8681 EVT WideVT = Wide.getValueType();
8682 unsigned WideNumElts = WideVT.getVectorNumElements();
8683 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8684
8685 SmallVector<int, 16> ShuffV;
8686 for (unsigned i = 0; i < WideNumElts; ++i)
8687 ShuffV.push_back(i + WideNumElts);
8688
8689 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8690 int SaveElts = FourEltRes ? 4 : 2;
8691 if (Subtarget.isLittleEndian())
8692 for (int i = 0; i < SaveElts; i++)
8693 ShuffV[i * Stride] = i;
8694 else
8695 for (int i = 1; i <= SaveElts; i++)
8696 ShuffV[i * Stride - 1] = i - 1;
8697
8698 SDValue ShuffleSrc2 =
8699 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8700 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8701
8702 SDValue Extend;
8703 if (SignedConv) {
8704 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8705 EVT ExtVT = Src.getValueType();
8706 if (Subtarget.hasP9Altivec())
8707 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8708 IntermediateVT.getVectorNumElements());
8709
8710 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8711 DAG.getValueType(ExtVT));
8712 } else
8713 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8714
8715 if (IsStrict)
8716 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8717 {Op.getOperand(0), Extend}, Flags);
8718
8719 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8720}
8721
8722SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8723 SelectionDAG &DAG) const {
8724 SDLoc dl(Op);
8725 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8726 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8727 bool IsStrict = Op->isStrictFPOpcode();
8728 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8729 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8730
8731 // TODO: Any other flags to propagate?
8732 SDNodeFlags Flags;
8733 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8734
8735 EVT InVT = Src.getValueType();
8736 EVT OutVT = Op.getValueType();
8737 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8738 isOperationCustom(Op.getOpcode(), InVT))
8739 return LowerINT_TO_FPVector(Op, DAG, dl);
8740
8741 // Conversions to f128 are legal.
8742 if (Op.getValueType() == MVT::f128)
8743 return Subtarget.hasP9Vector() ? Op : SDValue();
8744
8745 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8746 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8747 return SDValue();
8748
8749 if (Src.getValueType() == MVT::i1) {
8750 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8751 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8752 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8753 if (IsStrict)
8754 return DAG.getMergeValues({Sel, Chain}, dl);
8755 else
8756 return Sel;
8757 }
8758
8759 // If we have direct moves, we can do all the conversion, skip the store/load
8760 // however, without FPCVT we can't do most conversions.
8761 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8762 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8763 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8764
8765 assert((IsSigned || Subtarget.hasFPCVT()) &&
8766 "UINT_TO_FP is supported only with FPCVT");
8767
8768 if (Src.getValueType() == MVT::i64) {
8769 SDValue SINT = Src;
8770 // When converting to single-precision, we actually need to convert
8771 // to double-precision first and then round to single-precision.
8772 // To avoid double-rounding effects during that operation, we have
8773 // to prepare the input operand. Bits that might be truncated when
8774 // converting to double-precision are replaced by a bit that won't
8775 // be lost at this stage, but is below the single-precision rounding
8776 // position.
8777 //
8778 // However, if afn is in effect, accept double
8779 // rounding to avoid the extra overhead.
8780 // FIXME: Currently INT_TO_FP can't support fast math flags because
8781 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8782 // false.
8783 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8784 !Op->getFlags().hasApproximateFuncs()) {
8785
8786 // Twiddle input to make sure the low 11 bits are zero. (If this
8787 // is the case, we are guaranteed the value will fit into the 53 bit
8788 // mantissa of an IEEE double-precision value without rounding.)
8789 // If any of those low 11 bits were not zero originally, make sure
8790 // bit 12 (value 2048) is set instead, so that the final rounding
8791 // to single-precision gets the correct result.
8792 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8793 SINT, DAG.getConstant(2047, dl, MVT::i64));
8794 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8795 Round, DAG.getConstant(2047, dl, MVT::i64));
8796 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8797 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8798 DAG.getSignedConstant(-2048, dl, MVT::i64));
8799
8800 // However, we cannot use that value unconditionally: if the magnitude
8801 // of the input value is small, the bit-twiddling we did above might
8802 // end up visibly changing the output. Fortunately, in that case, we
8803 // don't need to twiddle bits since the original input will convert
8804 // exactly to double-precision floating-point already. Therefore,
8805 // construct a conditional to use the original value if the top 11
8806 // bits are all sign-bit copies, and use the rounded value computed
8807 // above otherwise.
8808 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8809 SINT, DAG.getConstant(53, dl, MVT::i32));
8810 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8811 Cond, DAG.getConstant(1, dl, MVT::i64));
8812 Cond = DAG.getSetCC(
8813 dl,
8814 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8815 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8816
8817 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8818 }
8819
8820 ReuseLoadInfo RLI;
8821 SDValue Bits;
8822
8823 MachineFunction &MF = DAG.getMachineFunction();
8824 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8825 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8826 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8827 if (RLI.ResChain)
8828 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8829 } else if (Subtarget.hasLFIWAX() &&
8830 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8831 MachineMemOperand *MMO =
8833 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8834 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8835 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8836 DAG.getVTList(MVT::f64, MVT::Other),
8837 Ops, MVT::i32, MMO);
8838 if (RLI.ResChain)
8839 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8840 } else if (Subtarget.hasFPCVT() &&
8841 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8842 MachineMemOperand *MMO =
8844 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8845 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8846 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8847 DAG.getVTList(MVT::f64, MVT::Other),
8848 Ops, MVT::i32, MMO);
8849 if (RLI.ResChain)
8850 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8851 } else if (((Subtarget.hasLFIWAX() &&
8852 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8853 (Subtarget.hasFPCVT() &&
8854 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8855 SINT.getOperand(0).getValueType() == MVT::i32) {
8856 MachineFrameInfo &MFI = MF.getFrameInfo();
8857 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8858
8859 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8860 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8861
8862 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8864 DAG.getMachineFunction(), FrameIdx));
8865 Chain = Store;
8866
8867 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8868 "Expected an i32 store");
8869
8870 RLI.Ptr = FIdx;
8871 RLI.Chain = Chain;
8872 RLI.MPI =
8874 RLI.Alignment = Align(4);
8875
8876 MachineMemOperand *MMO =
8878 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8879 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8881 PPCISD::LFIWZX : PPCISD::LFIWAX,
8882 dl, DAG.getVTList(MVT::f64, MVT::Other),
8883 Ops, MVT::i32, MMO);
8884 Chain = Bits.getValue(1);
8885 } else
8886 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8887
8888 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8889 if (IsStrict)
8890 Chain = FP.getValue(1);
8891
8892 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8893 if (IsStrict)
8894 FP = DAG.getNode(
8895 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8896 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8897 Flags);
8898 else
8899 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8900 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8901 }
8902 return FP;
8903 }
8904
8905 assert(Src.getValueType() == MVT::i32 &&
8906 "Unhandled INT_TO_FP type in custom expander!");
8907 // Since we only generate this in 64-bit mode, we can take advantage of
8908 // 64-bit registers. In particular, sign extend the input value into the
8909 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8910 // then lfd it and fcfid it.
8911 MachineFunction &MF = DAG.getMachineFunction();
8912 MachineFrameInfo &MFI = MF.getFrameInfo();
8913 EVT PtrVT = getPointerTy(MF.getDataLayout());
8914
8915 SDValue Ld;
8916 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8917 ReuseLoadInfo RLI;
8918 bool ReusingLoad;
8919 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8920 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8921 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8922
8923 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8925 DAG.getMachineFunction(), FrameIdx));
8926 Chain = Store;
8927
8928 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8929 "Expected an i32 store");
8930
8931 RLI.Ptr = FIdx;
8932 RLI.Chain = Chain;
8933 RLI.MPI =
8935 RLI.Alignment = Align(4);
8936 }
8937
8938 MachineMemOperand *MMO =
8940 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8941 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8942 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8943 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8944 MVT::i32, MMO);
8945 Chain = Ld.getValue(1);
8946 if (ReusingLoad && RLI.ResChain) {
8947 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
8948 }
8949 } else {
8950 assert(Subtarget.isPPC64() &&
8951 "i32->FP without LFIWAX supported only on PPC64");
8952
8953 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8954 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8955
8956 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8957
8958 // STD the extended value into the stack slot.
8959 SDValue Store = DAG.getStore(
8960 Chain, dl, Ext64, FIdx,
8962 Chain = Store;
8963
8964 // Load the value as a double.
8965 Ld = DAG.getLoad(
8966 MVT::f64, dl, Chain, FIdx,
8968 Chain = Ld.getValue(1);
8969 }
8970
8971 // FCFID it and return it.
8972 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8973 if (IsStrict)
8974 Chain = FP.getValue(1);
8975 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8976 if (IsStrict)
8977 FP = DAG.getNode(
8978 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8979 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
8980 else
8981 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8982 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8983 }
8984 return FP;
8985}
8986
8987SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
8988 SelectionDAG &DAG) const {
8989 SDLoc Dl(Op);
8990 MachineFunction &MF = DAG.getMachineFunction();
8991 EVT PtrVT = getPointerTy(MF.getDataLayout());
8992 SDValue Chain = Op.getOperand(0);
8993
8994 // If requested mode is constant, just use simpler mtfsb/mffscrni
8995 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
8996 uint64_t Mode = CVal->getZExtValue();
8997 assert(Mode < 4 && "Unsupported rounding mode!");
8998 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
8999 if (Subtarget.isISA3_0())
9000 return SDValue(
9001 DAG.getMachineNode(
9002 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9003 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9004 1);
9005 SDNode *SetHi = DAG.getMachineNode(
9006 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9007 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9008 SDNode *SetLo = DAG.getMachineNode(
9009 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9010 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9011 return SDValue(SetLo, 0);
9012 }
9013
9014 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9015 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9016 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9017 DAG.getConstant(3, Dl, MVT::i32));
9018 SDValue DstFlag = DAG.getNode(
9019 ISD::XOR, Dl, MVT::i32, SrcFlag,
9020 DAG.getNode(ISD::AND, Dl, MVT::i32,
9021 DAG.getNOT(Dl,
9022 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9023 MVT::i32),
9024 One));
9025 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9026 SDValue MFFS;
9027 if (!Subtarget.isISA3_0()) {
9028 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9029 Chain = MFFS.getValue(1);
9030 }
9031 SDValue NewFPSCR;
9032 if (Subtarget.isPPC64()) {
9033 if (Subtarget.isISA3_0()) {
9034 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9035 } else {
9036 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9037 SDNode *InsertRN = DAG.getMachineNode(
9038 PPC::RLDIMI, Dl, MVT::i64,
9039 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9040 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9041 DAG.getTargetConstant(0, Dl, MVT::i32),
9042 DAG.getTargetConstant(62, Dl, MVT::i32)});
9043 NewFPSCR = SDValue(InsertRN, 0);
9044 }
9045 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9046 } else {
9047 // In 32-bit mode, store f64, load and update the lower half.
9048 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9049 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9050 SDValue Addr = Subtarget.isLittleEndian()
9051 ? StackSlot
9052 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9053 DAG.getConstant(4, Dl, PtrVT));
9054 if (Subtarget.isISA3_0()) {
9055 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9056 } else {
9057 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9058 SDValue Tmp =
9059 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9060 Chain = Tmp.getValue(1);
9061 Tmp = SDValue(DAG.getMachineNode(
9062 PPC::RLWIMI, Dl, MVT::i32,
9063 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9064 DAG.getTargetConstant(30, Dl, MVT::i32),
9065 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9066 0);
9067 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9068 }
9069 NewFPSCR =
9070 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9071 Chain = NewFPSCR.getValue(1);
9072 }
9073 if (Subtarget.isISA3_0())
9074 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9075 {NewFPSCR, Chain}),
9076 1);
9077 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9078 SDNode *MTFSF = DAG.getMachineNode(
9079 PPC::MTFSF, Dl, MVT::Other,
9080 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9081 return SDValue(MTFSF, 0);
9082}
9083
9084SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9085 SelectionDAG &DAG) const {
9086 SDLoc dl(Op);
9087 /*
9088 The rounding mode is in bits 30:31 of FPSR, and has the following
9089 settings:
9090 00 Round to nearest
9091 01 Round to 0
9092 10 Round to +inf
9093 11 Round to -inf
9094
9095 GET_ROUNDING, on the other hand, expects the following:
9096 -1 Undefined
9097 0 Round to 0
9098 1 Round to nearest
9099 2 Round to +inf
9100 3 Round to -inf
9101
9102 To perform the conversion, we do:
9103 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9104 */
9105
9106 MachineFunction &MF = DAG.getMachineFunction();
9107 EVT VT = Op.getValueType();
9108 EVT PtrVT = getPointerTy(MF.getDataLayout());
9109
9110 // Save FP Control Word to register
9111 SDValue Chain = Op.getOperand(0);
9112 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9113 Chain = MFFS.getValue(1);
9114
9115 SDValue CWD;
9116 if (isTypeLegal(MVT::i64)) {
9117 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9118 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9119 } else {
9120 // Save FP register to stack slot
9121 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9122 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9123 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9124
9125 // Load FP Control Word from low 32 bits of stack slot.
9127 "Stack slot adjustment is valid only on big endian subtargets!");
9128 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9129 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9130 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9131 Chain = CWD.getValue(1);
9132 }
9133
9134 // Transform as necessary
9135 SDValue CWD1 =
9136 DAG.getNode(ISD::AND, dl, MVT::i32,
9137 CWD, DAG.getConstant(3, dl, MVT::i32));
9138 SDValue CWD2 =
9139 DAG.getNode(ISD::SRL, dl, MVT::i32,
9140 DAG.getNode(ISD::AND, dl, MVT::i32,
9141 DAG.getNode(ISD::XOR, dl, MVT::i32,
9142 CWD, DAG.getConstant(3, dl, MVT::i32)),
9143 DAG.getConstant(3, dl, MVT::i32)),
9144 DAG.getConstant(1, dl, MVT::i32));
9145
9146 SDValue RetVal =
9147 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9148
9149 RetVal =
9151 dl, VT, RetVal);
9152
9153 return DAG.getMergeValues({RetVal, Chain}, dl);
9154}
9155
9156SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9157 EVT VT = Op.getValueType();
9158 uint64_t BitWidth = VT.getSizeInBits();
9159 SDLoc dl(Op);
9160 assert(Op.getNumOperands() == 3 &&
9161 VT == Op.getOperand(1).getValueType() &&
9162 "Unexpected SHL!");
9163
9164 // Expand into a bunch of logical ops. Note that these ops
9165 // depend on the PPC behavior for oversized shift amounts.
9166 SDValue Lo = Op.getOperand(0);
9167 SDValue Hi = Op.getOperand(1);
9168 SDValue Amt = Op.getOperand(2);
9169 EVT AmtVT = Amt.getValueType();
9170
9171 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9172 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9173 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9174 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9175 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9176 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9177 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9178 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9179 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9180 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9181 SDValue OutOps[] = { OutLo, OutHi };
9182 return DAG.getMergeValues(OutOps, dl);
9183}
9184
9185SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9186 EVT VT = Op.getValueType();
9187 SDLoc dl(Op);
9188 uint64_t BitWidth = VT.getSizeInBits();
9189 assert(Op.getNumOperands() == 3 &&
9190 VT == Op.getOperand(1).getValueType() &&
9191 "Unexpected SRL!");
9192
9193 // Expand into a bunch of logical ops. Note that these ops
9194 // depend on the PPC behavior for oversized shift amounts.
9195 SDValue Lo = Op.getOperand(0);
9196 SDValue Hi = Op.getOperand(1);
9197 SDValue Amt = Op.getOperand(2);
9198 EVT AmtVT = Amt.getValueType();
9199
9200 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9201 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9202 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9203 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9204 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9205 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9206 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9207 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9208 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9209 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9210 SDValue OutOps[] = { OutLo, OutHi };
9211 return DAG.getMergeValues(OutOps, dl);
9212}
9213
9214SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9215 SDLoc dl(Op);
9216 EVT VT = Op.getValueType();
9217 uint64_t BitWidth = VT.getSizeInBits();
9218 assert(Op.getNumOperands() == 3 &&
9219 VT == Op.getOperand(1).getValueType() &&
9220 "Unexpected SRA!");
9221
9222 // Expand into a bunch of logical ops, followed by a select_cc.
9223 SDValue Lo = Op.getOperand(0);
9224 SDValue Hi = Op.getOperand(1);
9225 SDValue Amt = Op.getOperand(2);
9226 EVT AmtVT = Amt.getValueType();
9227
9228 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9229 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9230 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9231 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9232 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9233 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9234 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9235 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9236 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9237 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9238 Tmp4, Tmp6, ISD::SETLE);
9239 SDValue OutOps[] = { OutLo, OutHi };
9240 return DAG.getMergeValues(OutOps, dl);
9241}
9242
9243SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9244 SelectionDAG &DAG) const {
9245 SDLoc dl(Op);
9246 EVT VT = Op.getValueType();
9247 unsigned BitWidth = VT.getSizeInBits();
9248
9249 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9250 SDValue X = Op.getOperand(0);
9251 SDValue Y = Op.getOperand(1);
9252 SDValue Z = Op.getOperand(2);
9253 EVT AmtVT = Z.getValueType();
9254
9255 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9256 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9257 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9258 // on PowerPC shift by BW being well defined.
9259 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9260 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9261 SDValue SubZ =
9262 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9263 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9264 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9265 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9266}
9267
9268//===----------------------------------------------------------------------===//
9269// Vector related lowering.
9270//
9271
9272/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9273/// element size of SplatSize. Cast the result to VT.
9274static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9275 SelectionDAG &DAG, const SDLoc &dl) {
9276 static const MVT VTys[] = { // canonical VT to use for each size.
9277 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9278 };
9279
9280 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9281
9282 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9283 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9284 SplatSize = 1;
9285 Val = 0xFF;
9286 }
9287
9288 EVT CanonicalVT = VTys[SplatSize-1];
9289
9290 // Build a canonical splat for this value.
9291 // Explicitly truncate APInt here, as this API is used with a mix of
9292 // signed and unsigned values.
9293 return DAG.getBitcast(
9294 ReqVT,
9295 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9296}
9297
9298/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9299/// specified intrinsic ID.
9301 const SDLoc &dl, EVT DestVT = MVT::Other) {
9302 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9303 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9304 DAG.getConstant(IID, dl, MVT::i32), Op);
9305}
9306
9307/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9308/// specified intrinsic ID.
9310 SelectionDAG &DAG, const SDLoc &dl,
9311 EVT DestVT = MVT::Other) {
9312 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9313 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9314 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9315}
9316
9317/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9318/// specified intrinsic ID.
9319static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9320 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9321 EVT DestVT = MVT::Other) {
9322 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9323 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9324 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9325}
9326
9327/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9328/// amount. The result has the specified value type.
9329static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9330 SelectionDAG &DAG, const SDLoc &dl) {
9331 // Force LHS/RHS to be the right type.
9332 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9333 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9334
9335 int Ops[16];
9336 for (unsigned i = 0; i != 16; ++i)
9337 Ops[i] = i + Amt;
9338 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9339 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9340}
9341
9342/// Do we have an efficient pattern in a .td file for this node?
9343///
9344/// \param V - pointer to the BuildVectorSDNode being matched
9345/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9346///
9347/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9348/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9349/// the opposite is true (expansion is beneficial) are:
9350/// - The node builds a vector out of integers that are not 32 or 64-bits
9351/// - The node builds a vector out of constants
9352/// - The node is a "load-and-splat"
9353/// In all other cases, we will choose to keep the BUILD_VECTOR.
9355 bool HasDirectMove,
9356 bool HasP8Vector) {
9357 EVT VecVT = V->getValueType(0);
9358 bool RightType = VecVT == MVT::v2f64 ||
9359 (HasP8Vector && VecVT == MVT::v4f32) ||
9360 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9361 if (!RightType)
9362 return false;
9363
9364 bool IsSplat = true;
9365 bool IsLoad = false;
9366 SDValue Op0 = V->getOperand(0);
9367
9368 // This function is called in a block that confirms the node is not a constant
9369 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9370 // different constants.
9371 if (V->isConstant())
9372 return false;
9373 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9374 if (V->getOperand(i).isUndef())
9375 return false;
9376 // We want to expand nodes that represent load-and-splat even if the
9377 // loaded value is a floating point truncation or conversion to int.
9378 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9379 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9380 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9381 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9382 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9383 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9384 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9385 IsLoad = true;
9386 // If the operands are different or the input is not a load and has more
9387 // uses than just this BV node, then it isn't a splat.
9388 if (V->getOperand(i) != Op0 ||
9389 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9390 IsSplat = false;
9391 }
9392 return !(IsSplat && IsLoad);
9393}
9394
9395// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9396SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9397
9398 SDLoc dl(Op);
9399 SDValue Op0 = Op->getOperand(0);
9400
9401 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9402 (Op.getValueType() != MVT::f128))
9403 return SDValue();
9404
9405 SDValue Lo = Op0.getOperand(0);
9406 SDValue Hi = Op0.getOperand(1);
9407 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9408 return SDValue();
9409
9410 if (!Subtarget.isLittleEndian())
9411 std::swap(Lo, Hi);
9412
9413 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9414}
9415
9416static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9417 const SDValue *InputLoad = &Op;
9418 while (InputLoad->getOpcode() == ISD::BITCAST)
9419 InputLoad = &InputLoad->getOperand(0);
9420 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9421 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9422 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9423 InputLoad = &InputLoad->getOperand(0);
9424 }
9425 if (InputLoad->getOpcode() != ISD::LOAD)
9426 return nullptr;
9427 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9428 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9429}
9430
9431// Convert the argument APFloat to a single precision APFloat if there is no
9432// loss in information during the conversion to single precision APFloat and the
9433// resulting number is not a denormal number. Return true if successful.
9435 APFloat APFloatToConvert = ArgAPFloat;
9436 bool LosesInfo = true;
9438 &LosesInfo);
9439 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9440 if (Success)
9441 ArgAPFloat = APFloatToConvert;
9442 return Success;
9443}
9444
9445// Bitcast the argument APInt to a double and convert it to a single precision
9446// APFloat, bitcast the APFloat to an APInt and assign it to the original
9447// argument if there is no loss in information during the conversion from
9448// double to single precision APFloat and the resulting number is not a denormal
9449// number. Return true if successful.
9451 double DpValue = ArgAPInt.bitsToDouble();
9452 APFloat APFloatDp(DpValue);
9453 bool Success = convertToNonDenormSingle(APFloatDp);
9454 if (Success)
9455 ArgAPInt = APFloatDp.bitcastToAPInt();
9456 return Success;
9457}
9458
9459// Nondestructive check for convertTonNonDenormSingle.
9461 // Only convert if it loses info, since XXSPLTIDP should
9462 // handle the other case.
9463 APFloat APFloatToConvert = ArgAPFloat;
9464 bool LosesInfo = true;
9466 &LosesInfo);
9467
9468 return (!LosesInfo && !APFloatToConvert.isDenormal());
9469}
9470
9471static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9472 unsigned &Opcode) {
9473 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9474 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9475 return false;
9476
9477 EVT Ty = Op->getValueType(0);
9478 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9479 // as we cannot handle extending loads for these types.
9480 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9481 ISD::isNON_EXTLoad(InputNode))
9482 return true;
9483
9484 EVT MemVT = InputNode->getMemoryVT();
9485 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9486 // memory VT is the same vector element VT type.
9487 // The loads feeding into the v8i16 and v16i8 types will be extending because
9488 // scalar i8/i16 are not legal types.
9489 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9490 (MemVT == Ty.getVectorElementType()))
9491 return true;
9492
9493 if (Ty == MVT::v2i64) {
9494 // Check the extend type, when the input type is i32, and the output vector
9495 // type is v2i64.
9496 if (MemVT == MVT::i32) {
9497 if (ISD::isZEXTLoad(InputNode))
9498 Opcode = PPCISD::ZEXT_LD_SPLAT;
9499 if (ISD::isSEXTLoad(InputNode))
9500 Opcode = PPCISD::SEXT_LD_SPLAT;
9501 }
9502 return true;
9503 }
9504 return false;
9505}
9506
9508 bool IsLittleEndian) {
9509 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9510
9511 BitMask.clearAllBits();
9512 EVT VT = BVN.getValueType(0);
9513 unsigned VTSize = VT.getSizeInBits();
9514 APInt ConstValue(VTSize, 0);
9515
9516 unsigned EltWidth = VT.getScalarSizeInBits();
9517
9518 unsigned BitPos = 0;
9519 for (auto OpVal : BVN.op_values()) {
9520 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9521
9522 if (!CN)
9523 return false;
9524 // The elements in a vector register are ordered in reverse byte order
9525 // between little-endian and big-endian modes.
9526 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9527 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9528 BitPos += EltWidth;
9529 }
9530
9531 for (unsigned J = 0; J < 16; ++J) {
9532 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9533 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9534 return false;
9535 if (ExtractValue == 0xFF)
9536 BitMask.setBit(J);
9537 }
9538 return true;
9539}
9540
9541// If this is a case we can't handle, return null and let the default
9542// expansion code take care of it. If we CAN select this case, and if it
9543// selects to a single instruction, return Op. Otherwise, if we can codegen
9544// this case more efficiently than a constant pool load, lower it to the
9545// sequence of ops that should be used.
9546SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9547 SelectionDAG &DAG) const {
9548 SDLoc dl(Op);
9549 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9550 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9551
9552 if (Subtarget.hasP10Vector()) {
9553 APInt BitMask(32, 0);
9554 // If the value of the vector is all zeros or all ones,
9555 // we do not convert it to MTVSRBMI.
9556 // The xxleqv instruction sets a vector with all ones.
9557 // The xxlxor instruction sets a vector with all zeros.
9558 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9559 BitMask != 0 && BitMask != 0xffff) {
9560 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9561 MachineSDNode *MSDNode =
9562 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9563 SDValue SDV = SDValue(MSDNode, 0);
9564 EVT DVT = BVN->getValueType(0);
9565 EVT SVT = SDV.getValueType();
9566 if (SVT != DVT) {
9567 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9568 }
9569 return SDV;
9570 }
9571 // Recognize build vector patterns to emit VSX vector instructions
9572 // instead of loading value from memory.
9573 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9574 return VecPat;
9575 }
9576 // Check if this is a splat of a constant value.
9577 APInt APSplatBits, APSplatUndef;
9578 unsigned SplatBitSize;
9579 bool HasAnyUndefs;
9580 bool BVNIsConstantSplat =
9581 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9582 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9583
9584 // If it is a splat of a double, check if we can shrink it to a 32 bit
9585 // non-denormal float which when converted back to double gives us the same
9586 // double. This is to exploit the XXSPLTIDP instruction.
9587 // If we lose precision, we use XXSPLTI32DX.
9588 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9589 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9590 // Check the type first to short-circuit so we don't modify APSplatBits if
9591 // this block isn't executed.
9592 if ((Op->getValueType(0) == MVT::v2f64) &&
9593 convertToNonDenormSingle(APSplatBits)) {
9594 SDValue SplatNode = DAG.getNode(
9595 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9596 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9597 return DAG.getBitcast(Op.getValueType(), SplatNode);
9598 } else {
9599 // We may lose precision, so we have to use XXSPLTI32DX.
9600
9601 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9602 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9603 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9604
9605 if (!Hi || !Lo)
9606 // If either load is 0, then we should generate XXLXOR to set to 0.
9607 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9608
9609 if (Hi)
9610 SplatNode = DAG.getNode(
9611 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9612 DAG.getTargetConstant(0, dl, MVT::i32),
9613 DAG.getTargetConstant(Hi, dl, MVT::i32));
9614
9615 if (Lo)
9616 SplatNode =
9617 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9618 DAG.getTargetConstant(1, dl, MVT::i32),
9619 DAG.getTargetConstant(Lo, dl, MVT::i32));
9620
9621 return DAG.getBitcast(Op.getValueType(), SplatNode);
9622 }
9623 }
9624
9625 bool IsSplat64 = false;
9626 uint64_t SplatBits = 0;
9627 int32_t SextVal = 0;
9628 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9629 SplatBits = APSplatBits.getZExtValue();
9630 if (SplatBitSize <= 32) {
9631 SextVal = SignExtend32(SplatBits, SplatBitSize);
9632 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9633 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9634 bool P9Vector = Subtarget.hasP9Vector();
9635 int32_t Hi = P9Vector ? 127 : 15;
9636 int32_t Lo = P9Vector ? -128 : -16;
9637 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9638 SextVal = static_cast<int32_t>(SplatBits);
9639 }
9640 }
9641
9642 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9643 unsigned NewOpcode = PPCISD::LD_SPLAT;
9644
9645 // Handle load-and-splat patterns as we have instructions that will do this
9646 // in one go.
9647 if (DAG.isSplatValue(Op, true) &&
9648 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9649 const SDValue *InputLoad = &Op.getOperand(0);
9650 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9651
9652 // If the input load is an extending load, it will be an i32 -> i64
9653 // extending load and isValidSplatLoad() will update NewOpcode.
9654 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9655 unsigned ElementSize =
9656 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9657
9658 assert(((ElementSize == 2 * MemorySize)
9659 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9660 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9661 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9662 "Unmatched element size and opcode!\n");
9663
9664 // Checking for a single use of this load, we have to check for vector
9665 // width (128 bits) / ElementSize uses (since each operand of the
9666 // BUILD_VECTOR is a separate use of the value.
9667 unsigned NumUsesOfInputLD = 128 / ElementSize;
9668 for (SDValue BVInOp : Op->ops())
9669 if (BVInOp.isUndef())
9670 NumUsesOfInputLD--;
9671
9672 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9673 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9674 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9675 // 15", but function IsValidSplatLoad() now will only return true when
9676 // the data at index 0 is not nullptr. So we will not get into trouble for
9677 // these cases.
9678 //
9679 // case 1 - lfiwzx/lfiwax
9680 // 1.1: load result is i32 and is sign/zero extend to i64;
9681 // 1.2: build a v2i64 vector type with above loaded value;
9682 // 1.3: the vector has only one value at index 0, others are all undef;
9683 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9684 if (NumUsesOfInputLD == 1 &&
9685 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9686 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9687 Subtarget.hasLFIWAX()))
9688 return SDValue();
9689
9690 // case 2 - lxvr[hb]x
9691 // 2.1: load result is at most i16;
9692 // 2.2: build a vector with above loaded value;
9693 // 2.3: the vector has only one value at index 0, others are all undef;
9694 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9695 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9696 Subtarget.isISA3_1() && ElementSize <= 16)
9697 return SDValue();
9698
9699 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9700 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9701 Subtarget.hasVSX()) {
9702 SDValue Ops[] = {
9703 LD->getChain(), // Chain
9704 LD->getBasePtr(), // Ptr
9705 DAG.getValueType(Op.getValueType()) // VT
9706 };
9707 SDValue LdSplt = DAG.getMemIntrinsicNode(
9708 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9709 LD->getMemoryVT(), LD->getMemOperand());
9710 // Replace all uses of the output chain of the original load with the
9711 // output chain of the new load.
9712 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9713 LdSplt.getValue(1));
9714 return LdSplt;
9715 }
9716 }
9717
9718 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9719 // 32-bits can be lowered to VSX instructions under certain conditions.
9720 // Without VSX, there is no pattern more efficient than expanding the node.
9721 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9722 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9723 Subtarget.hasP8Vector()))
9724 return Op;
9725 return SDValue();
9726 }
9727
9728 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9729 unsigned SplatSize = SplatBitSize / 8;
9730
9731 // First, handle single instruction cases.
9732
9733 // All zeros?
9734 if (SplatBits == 0) {
9735 // Canonicalize all zero vectors to be v4i32.
9736 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9737 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9738 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9739 }
9740 return Op;
9741 }
9742
9743 // We have XXSPLTIW for constant splats four bytes wide.
9744 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9745 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9746 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9747 // turned into a 4-byte splat of 0xABABABAB.
9748 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9749 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9750 Op.getValueType(), DAG, dl);
9751
9752 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9753 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9754 dl);
9755
9756 // We have XXSPLTIB for constant splats one byte wide.
9757 if (Subtarget.hasP9Vector() && SplatSize == 1)
9758 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9759 dl);
9760
9761 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9762 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9763 if (SextVal >= -16 && SextVal <= 15) {
9764 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9765 // generate a splat word with extend for size 8.
9766 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9767 SDValue Res =
9768 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9769 if (SplatSize != 8)
9770 return Res;
9771 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9772 }
9773
9774 // Two instruction sequences.
9775
9776 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9777 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9779 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9780 unsigned IID;
9781 EVT VT;
9782 switch (SplatSize) {
9783 default:
9784 llvm_unreachable("Unexpected type for vector constant.");
9785 case 2:
9786 IID = Intrinsic::ppc_altivec_vupklsb;
9787 VT = MVT::v8i16;
9788 break;
9789 case 4:
9790 IID = Intrinsic::ppc_altivec_vextsb2w;
9791 VT = MVT::v4i32;
9792 break;
9793 case 8:
9794 IID = Intrinsic::ppc_altivec_vextsb2d;
9795 VT = MVT::v2i64;
9796 break;
9797 }
9798 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9799 return DAG.getBitcast(Op->getValueType(0), Extend);
9800 }
9801 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9802
9803 // If this value is in the range [-32,30] and is even, use:
9804 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9805 // If this value is in the range [17,31] and is odd, use:
9806 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9807 // If this value is in the range [-31,-17] and is odd, use:
9808 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9809 // Note the last two are three-instruction sequences.
9810 if (SextVal >= -32 && SextVal <= 31) {
9811 // To avoid having these optimizations undone by constant folding,
9812 // we convert to a pseudo that will be expanded later into one of
9813 // the above forms.
9814 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9815 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9816 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9817 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9818 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9819 if (VT == Op.getValueType())
9820 return RetVal;
9821 else
9822 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9823 }
9824
9825 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9826 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9827 // for fneg/fabs.
9828 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9829 // Make -1 and vspltisw -1:
9830 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9831
9832 // Make the VSLW intrinsic, computing 0x8000_0000.
9833 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9834 OnesV, DAG, dl);
9835
9836 // xor by OnesV to invert it.
9837 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9838 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9839 }
9840
9841 // Check to see if this is a wide variety of vsplti*, binop self cases.
9842 static const signed char SplatCsts[] = {
9843 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9844 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9845 };
9846
9847 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9848 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9849 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9850 int i = SplatCsts[idx];
9851
9852 // Figure out what shift amount will be used by altivec if shifted by i in
9853 // this splat size.
9854 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9855
9856 // vsplti + shl self.
9857 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9858 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9859 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9860 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9861 Intrinsic::ppc_altivec_vslw
9862 };
9863 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9864 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9865 }
9866
9867 // vsplti + srl self.
9868 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9869 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9870 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9871 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9872 Intrinsic::ppc_altivec_vsrw
9873 };
9874 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9875 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9876 }
9877
9878 // vsplti + rol self.
9879 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9880 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9881 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9882 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9883 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9884 Intrinsic::ppc_altivec_vrlw
9885 };
9886 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9887 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9888 }
9889
9890 // t = vsplti c, result = vsldoi t, t, 1
9891 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9892 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9893 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9894 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9895 }
9896 // t = vsplti c, result = vsldoi t, t, 2
9897 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9898 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9899 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9900 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9901 }
9902 // t = vsplti c, result = vsldoi t, t, 3
9903 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9904 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9905 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9906 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9907 }
9908 }
9909
9910 return SDValue();
9911}
9912
9913/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9914/// the specified operations to build the shuffle.
9916 SDValue RHS, SelectionDAG &DAG,
9917 const SDLoc &dl) {
9918 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9919 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9920 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9921
9922 enum {
9923 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9924 OP_VMRGHW,
9925 OP_VMRGLW,
9926 OP_VSPLTISW0,
9927 OP_VSPLTISW1,
9928 OP_VSPLTISW2,
9929 OP_VSPLTISW3,
9930 OP_VSLDOI4,
9931 OP_VSLDOI8,
9932 OP_VSLDOI12
9933 };
9934
9935 if (OpNum == OP_COPY) {
9936 if (LHSID == (1*9+2)*9+3) return LHS;
9937 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9938 return RHS;
9939 }
9940
9941 SDValue OpLHS, OpRHS;
9942 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9943 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9944
9945 int ShufIdxs[16];
9946 switch (OpNum) {
9947 default: llvm_unreachable("Unknown i32 permute!");
9948 case OP_VMRGHW:
9949 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9950 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9951 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9952 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9953 break;
9954 case OP_VMRGLW:
9955 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9956 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9957 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9958 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9959 break;
9960 case OP_VSPLTISW0:
9961 for (unsigned i = 0; i != 16; ++i)
9962 ShufIdxs[i] = (i&3)+0;
9963 break;
9964 case OP_VSPLTISW1:
9965 for (unsigned i = 0; i != 16; ++i)
9966 ShufIdxs[i] = (i&3)+4;
9967 break;
9968 case OP_VSPLTISW2:
9969 for (unsigned i = 0; i != 16; ++i)
9970 ShufIdxs[i] = (i&3)+8;
9971 break;
9972 case OP_VSPLTISW3:
9973 for (unsigned i = 0; i != 16; ++i)
9974 ShufIdxs[i] = (i&3)+12;
9975 break;
9976 case OP_VSLDOI4:
9977 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9978 case OP_VSLDOI8:
9979 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9980 case OP_VSLDOI12:
9981 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9982 }
9983 EVT VT = OpLHS.getValueType();
9984 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9985 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9986 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9987 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9988}
9989
9990/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9991/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9992/// SDValue.
9993SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9994 SelectionDAG &DAG) const {
9995 const unsigned BytesInVector = 16;
9996 bool IsLE = Subtarget.isLittleEndian();
9997 SDLoc dl(N);
9998 SDValue V1 = N->getOperand(0);
9999 SDValue V2 = N->getOperand(1);
10000 unsigned ShiftElts = 0, InsertAtByte = 0;
10001 bool Swap = false;
10002
10003 // Shifts required to get the byte we want at element 7.
10004 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10005 0, 15, 14, 13, 12, 11, 10, 9};
10006 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10007 1, 2, 3, 4, 5, 6, 7, 8};
10008
10009 ArrayRef<int> Mask = N->getMask();
10010 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10011
10012 // For each mask element, find out if we're just inserting something
10013 // from V2 into V1 or vice versa.
10014 // Possible permutations inserting an element from V2 into V1:
10015 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10016 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10017 // ...
10018 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10019 // Inserting from V1 into V2 will be similar, except mask range will be
10020 // [16,31].
10021
10022 bool FoundCandidate = false;
10023 // If both vector operands for the shuffle are the same vector, the mask
10024 // will contain only elements from the first one and the second one will be
10025 // undef.
10026 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10027 // Go through the mask of half-words to find an element that's being moved
10028 // from one vector to the other.
10029 for (unsigned i = 0; i < BytesInVector; ++i) {
10030 unsigned CurrentElement = Mask[i];
10031 // If 2nd operand is undefined, we should only look for element 7 in the
10032 // Mask.
10033 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10034 continue;
10035
10036 bool OtherElementsInOrder = true;
10037 // Examine the other elements in the Mask to see if they're in original
10038 // order.
10039 for (unsigned j = 0; j < BytesInVector; ++j) {
10040 if (j == i)
10041 continue;
10042 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10043 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10044 // in which we always assume we're always picking from the 1st operand.
10045 int MaskOffset =
10046 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10047 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10048 OtherElementsInOrder = false;
10049 break;
10050 }
10051 }
10052 // If other elements are in original order, we record the number of shifts
10053 // we need to get the element we want into element 7. Also record which byte
10054 // in the vector we should insert into.
10055 if (OtherElementsInOrder) {
10056 // If 2nd operand is undefined, we assume no shifts and no swapping.
10057 if (V2.isUndef()) {
10058 ShiftElts = 0;
10059 Swap = false;
10060 } else {
10061 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10062 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10063 : BigEndianShifts[CurrentElement & 0xF];
10064 Swap = CurrentElement < BytesInVector;
10065 }
10066 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10067 FoundCandidate = true;
10068 break;
10069 }
10070 }
10071
10072 if (!FoundCandidate)
10073 return SDValue();
10074
10075 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10076 // optionally with VECSHL if shift is required.
10077 if (Swap)
10078 std::swap(V1, V2);
10079 if (V2.isUndef())
10080 V2 = V1;
10081 if (ShiftElts) {
10082 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10083 DAG.getConstant(ShiftElts, dl, MVT::i32));
10084 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10085 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10086 }
10087 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10088 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10089}
10090
10091/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10092/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10093/// SDValue.
10094SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10095 SelectionDAG &DAG) const {
10096 const unsigned NumHalfWords = 8;
10097 const unsigned BytesInVector = NumHalfWords * 2;
10098 // Check that the shuffle is on half-words.
10099 if (!isNByteElemShuffleMask(N, 2, 1))
10100 return SDValue();
10101
10102 bool IsLE = Subtarget.isLittleEndian();
10103 SDLoc dl(N);
10104 SDValue V1 = N->getOperand(0);
10105 SDValue V2 = N->getOperand(1);
10106 unsigned ShiftElts = 0, InsertAtByte = 0;
10107 bool Swap = false;
10108
10109 // Shifts required to get the half-word we want at element 3.
10110 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10111 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10112
10113 uint32_t Mask = 0;
10114 uint32_t OriginalOrderLow = 0x1234567;
10115 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10116 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10117 // 32-bit space, only need 4-bit nibbles per element.
10118 for (unsigned i = 0; i < NumHalfWords; ++i) {
10119 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10120 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10121 }
10122
10123 // For each mask element, find out if we're just inserting something
10124 // from V2 into V1 or vice versa. Possible permutations inserting an element
10125 // from V2 into V1:
10126 // X, 1, 2, 3, 4, 5, 6, 7
10127 // 0, X, 2, 3, 4, 5, 6, 7
10128 // 0, 1, X, 3, 4, 5, 6, 7
10129 // 0, 1, 2, X, 4, 5, 6, 7
10130 // 0, 1, 2, 3, X, 5, 6, 7
10131 // 0, 1, 2, 3, 4, X, 6, 7
10132 // 0, 1, 2, 3, 4, 5, X, 7
10133 // 0, 1, 2, 3, 4, 5, 6, X
10134 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10135
10136 bool FoundCandidate = false;
10137 // Go through the mask of half-words to find an element that's being moved
10138 // from one vector to the other.
10139 for (unsigned i = 0; i < NumHalfWords; ++i) {
10140 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10141 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10142 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10143 uint32_t TargetOrder = 0x0;
10144
10145 // If both vector operands for the shuffle are the same vector, the mask
10146 // will contain only elements from the first one and the second one will be
10147 // undef.
10148 if (V2.isUndef()) {
10149 ShiftElts = 0;
10150 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10151 TargetOrder = OriginalOrderLow;
10152 Swap = false;
10153 // Skip if not the correct element or mask of other elements don't equal
10154 // to our expected order.
10155 if (MaskOneElt == VINSERTHSrcElem &&
10156 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10157 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10158 FoundCandidate = true;
10159 break;
10160 }
10161 } else { // If both operands are defined.
10162 // Target order is [8,15] if the current mask is between [0,7].
10163 TargetOrder =
10164 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10165 // Skip if mask of other elements don't equal our expected order.
10166 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10167 // We only need the last 3 bits for the number of shifts.
10168 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10169 : BigEndianShifts[MaskOneElt & 0x7];
10170 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10171 Swap = MaskOneElt < NumHalfWords;
10172 FoundCandidate = true;
10173 break;
10174 }
10175 }
10176 }
10177
10178 if (!FoundCandidate)
10179 return SDValue();
10180
10181 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10182 // optionally with VECSHL if shift is required.
10183 if (Swap)
10184 std::swap(V1, V2);
10185 if (V2.isUndef())
10186 V2 = V1;
10187 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10188 if (ShiftElts) {
10189 // Double ShiftElts because we're left shifting on v16i8 type.
10190 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10191 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10192 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10193 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10194 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10195 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10196 }
10197 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10198 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10199 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10200 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10201}
10202
10203/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10204/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10205/// return the default SDValue.
10206SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10207 SelectionDAG &DAG) const {
10208 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10209 // to v16i8. Peek through the bitcasts to get the actual operands.
10212
10213 auto ShuffleMask = SVN->getMask();
10214 SDValue VecShuffle(SVN, 0);
10215 SDLoc DL(SVN);
10216
10217 // Check that we have a four byte shuffle.
10218 if (!isNByteElemShuffleMask(SVN, 4, 1))
10219 return SDValue();
10220
10221 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10222 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10223 std::swap(LHS, RHS);
10225 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10226 if (!CommutedSV)
10227 return SDValue();
10228 ShuffleMask = CommutedSV->getMask();
10229 }
10230
10231 // Ensure that the RHS is a vector of constants.
10232 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10233 if (!BVN)
10234 return SDValue();
10235
10236 // Check if RHS is a splat of 4-bytes (or smaller).
10237 APInt APSplatValue, APSplatUndef;
10238 unsigned SplatBitSize;
10239 bool HasAnyUndefs;
10240 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10241 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10242 SplatBitSize > 32)
10243 return SDValue();
10244
10245 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10246 // The instruction splats a constant C into two words of the source vector
10247 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10248 // Thus we check that the shuffle mask is the equivalent of
10249 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10250 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10251 // within each word are consecutive, so we only need to check the first byte.
10252 SDValue Index;
10253 bool IsLE = Subtarget.isLittleEndian();
10254 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10255 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10256 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10257 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10258 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10259 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10260 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10261 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10262 else
10263 return SDValue();
10264
10265 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10266 // for XXSPLTI32DX.
10267 unsigned SplatVal = APSplatValue.getZExtValue();
10268 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10269 SplatVal |= (SplatVal << SplatBitSize);
10270
10271 SDValue SplatNode = DAG.getNode(
10272 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10273 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10274 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10275}
10276
10277/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10278/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10279/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10280/// i.e (or (shl x, C1), (srl x, 128-C1)).
10281SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10282 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10283 assert(Op.getValueType() == MVT::v1i128 &&
10284 "Only set v1i128 as custom, other type shouldn't reach here!");
10285 SDLoc dl(Op);
10286 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10287 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10288 unsigned SHLAmt = N1.getConstantOperandVal(0);
10289 if (SHLAmt % 8 == 0) {
10290 std::array<int, 16> Mask;
10291 std::iota(Mask.begin(), Mask.end(), 0);
10292 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10293 if (SDValue Shuffle =
10294 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10295 DAG.getUNDEF(MVT::v16i8), Mask))
10296 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10297 }
10298 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10299 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10300 DAG.getConstant(SHLAmt, dl, MVT::i32));
10301 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10302 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10303 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10304 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10305}
10306
10307/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10308/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10309/// return the code it can be lowered into. Worst case, it can always be
10310/// lowered into a vperm.
10311SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10312 SelectionDAG &DAG) const {
10313 SDLoc dl(Op);
10314 SDValue V1 = Op.getOperand(0);
10315 SDValue V2 = Op.getOperand(1);
10316 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10317
10318 // Any nodes that were combined in the target-independent combiner prior
10319 // to vector legalization will not be sent to the target combine. Try to
10320 // combine it here.
10321 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10322 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10323 return NewShuffle;
10324 Op = NewShuffle;
10326 V1 = Op.getOperand(0);
10327 V2 = Op.getOperand(1);
10328 }
10329 EVT VT = Op.getValueType();
10330 bool isLittleEndian = Subtarget.isLittleEndian();
10331
10332 unsigned ShiftElts, InsertAtByte;
10333 bool Swap = false;
10334
10335 // If this is a load-and-splat, we can do that with a single instruction
10336 // in some cases. However if the load has multiple uses, we don't want to
10337 // combine it because that will just produce multiple loads.
10338 bool IsPermutedLoad = false;
10339 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10340 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10341 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10342 InputLoad->hasOneUse()) {
10343 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10344 int SplatIdx =
10345 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10346
10347 // The splat index for permuted loads will be in the left half of the vector
10348 // which is strictly wider than the loaded value by 8 bytes. So we need to
10349 // adjust the splat index to point to the correct address in memory.
10350 if (IsPermutedLoad) {
10351 assert((isLittleEndian || IsFourByte) &&
10352 "Unexpected size for permuted load on big endian target");
10353 SplatIdx += IsFourByte ? 2 : 1;
10354 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10355 "Splat of a value outside of the loaded memory");
10356 }
10357
10358 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10359 // For 4-byte load-and-splat, we need Power9.
10360 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10361 uint64_t Offset = 0;
10362 if (IsFourByte)
10363 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10364 else
10365 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10366
10367 // If the width of the load is the same as the width of the splat,
10368 // loading with an offset would load the wrong memory.
10369 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10370 Offset = 0;
10371
10372 SDValue BasePtr = LD->getBasePtr();
10373 if (Offset != 0)
10375 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10376 SDValue Ops[] = {
10377 LD->getChain(), // Chain
10378 BasePtr, // BasePtr
10379 DAG.getValueType(Op.getValueType()) // VT
10380 };
10381 SDVTList VTL =
10382 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10383 SDValue LdSplt =
10384 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10385 Ops, LD->getMemoryVT(), LD->getMemOperand());
10386 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10387 if (LdSplt.getValueType() != SVOp->getValueType(0))
10388 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10389 return LdSplt;
10390 }
10391 }
10392
10393 // All v2i64 and v2f64 shuffles are legal
10394 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10395 return Op;
10396
10397 if (Subtarget.hasP9Vector() &&
10398 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10399 isLittleEndian)) {
10400 if (V2.isUndef())
10401 V2 = V1;
10402 else if (Swap)
10403 std::swap(V1, V2);
10404 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10405 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10406 if (ShiftElts) {
10407 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10408 DAG.getConstant(ShiftElts, dl, MVT::i32));
10409 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10410 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10411 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10412 }
10413 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10414 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10415 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10416 }
10417
10418 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10419 SDValue SplatInsertNode;
10420 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10421 return SplatInsertNode;
10422 }
10423
10424 if (Subtarget.hasP9Altivec()) {
10425 SDValue NewISDNode;
10426 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10427 return NewISDNode;
10428
10429 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10430 return NewISDNode;
10431 }
10432
10433 if (Subtarget.hasVSX() &&
10434 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10435 if (Swap)
10436 std::swap(V1, V2);
10437 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10438 SDValue Conv2 =
10439 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10440
10441 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10442 DAG.getConstant(ShiftElts, dl, MVT::i32));
10443 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10444 }
10445
10446 if (Subtarget.hasVSX() &&
10447 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10448 if (Swap)
10449 std::swap(V1, V2);
10450 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10451 SDValue Conv2 =
10452 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10453
10454 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10455 DAG.getConstant(ShiftElts, dl, MVT::i32));
10456 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10457 }
10458
10459 if (Subtarget.hasP9Vector()) {
10460 if (PPC::isXXBRHShuffleMask(SVOp)) {
10461 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10462 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10463 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10464 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10465 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10466 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10467 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10468 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10469 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10470 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10471 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10472 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10473 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10474 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10475 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10476 }
10477 }
10478
10479 if (Subtarget.hasVSX()) {
10480 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10481 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10482
10483 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10484 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10485 DAG.getConstant(SplatIdx, dl, MVT::i32));
10486 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10487 }
10488
10489 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10490 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10491 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10492 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10493 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10494 }
10495 }
10496
10497 // Cases that are handled by instructions that take permute immediates
10498 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10499 // selected by the instruction selector.
10500 if (V2.isUndef()) {
10501 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10502 PPC::isSplatShuffleMask(SVOp, 2) ||
10503 PPC::isSplatShuffleMask(SVOp, 4) ||
10504 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10505 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10506 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10507 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10508 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10509 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10510 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10511 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10512 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10513 (Subtarget.hasP8Altivec() && (
10514 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10515 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10516 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10517 return Op;
10518 }
10519 }
10520
10521 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10522 // and produce a fixed permutation. If any of these match, do not lower to
10523 // VPERM.
10524 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10525 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10526 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10527 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10528 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10529 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10530 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10531 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10532 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10533 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10534 (Subtarget.hasP8Altivec() && (
10535 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10536 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10537 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10538 return Op;
10539
10540 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10541 // perfect shuffle table to emit an optimal matching sequence.
10542 ArrayRef<int> PermMask = SVOp->getMask();
10543
10544 if (!DisablePerfectShuffle && !isLittleEndian) {
10545 unsigned PFIndexes[4];
10546 bool isFourElementShuffle = true;
10547 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10548 ++i) { // Element number
10549 unsigned EltNo = 8; // Start out undef.
10550 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10551 if (PermMask[i * 4 + j] < 0)
10552 continue; // Undef, ignore it.
10553
10554 unsigned ByteSource = PermMask[i * 4 + j];
10555 if ((ByteSource & 3) != j) {
10556 isFourElementShuffle = false;
10557 break;
10558 }
10559
10560 if (EltNo == 8) {
10561 EltNo = ByteSource / 4;
10562 } else if (EltNo != ByteSource / 4) {
10563 isFourElementShuffle = false;
10564 break;
10565 }
10566 }
10567 PFIndexes[i] = EltNo;
10568 }
10569
10570 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10571 // perfect shuffle vector to determine if it is cost effective to do this as
10572 // discrete instructions, or whether we should use a vperm.
10573 // For now, we skip this for little endian until such time as we have a
10574 // little-endian perfect shuffle table.
10575 if (isFourElementShuffle) {
10576 // Compute the index in the perfect shuffle table.
10577 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10578 PFIndexes[2] * 9 + PFIndexes[3];
10579
10580 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10581 unsigned Cost = (PFEntry >> 30);
10582
10583 // Determining when to avoid vperm is tricky. Many things affect the cost
10584 // of vperm, particularly how many times the perm mask needs to be
10585 // computed. For example, if the perm mask can be hoisted out of a loop or
10586 // is already used (perhaps because there are multiple permutes with the
10587 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10588 // permute mask out of the loop requires an extra register.
10589 //
10590 // As a compromise, we only emit discrete instructions if the shuffle can
10591 // be generated in 3 or fewer operations. When we have loop information
10592 // available, if this block is within a loop, we should avoid using vperm
10593 // for 3-operation perms and use a constant pool load instead.
10594 if (Cost < 3)
10595 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10596 }
10597 }
10598
10599 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10600 // vector that will get spilled to the constant pool.
10601 if (V2.isUndef()) V2 = V1;
10602
10603 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10604}
10605
10606SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10607 ArrayRef<int> PermMask, EVT VT,
10608 SDValue V1, SDValue V2) const {
10609 unsigned Opcode = PPCISD::VPERM;
10610 EVT ValType = V1.getValueType();
10611 SDLoc dl(Op);
10612 bool NeedSwap = false;
10613 bool isLittleEndian = Subtarget.isLittleEndian();
10614 bool isPPC64 = Subtarget.isPPC64();
10615
10616 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10617 (V1->hasOneUse() || V2->hasOneUse())) {
10618 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10619 "XXPERM instead\n");
10620 Opcode = PPCISD::XXPERM;
10621
10622 // The second input to XXPERM is also an output so if the second input has
10623 // multiple uses then copying is necessary, as a result we want the
10624 // single-use operand to be used as the second input to prevent copying.
10625 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10626 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10627 std::swap(V1, V2);
10628 NeedSwap = !NeedSwap;
10629 }
10630 }
10631
10632 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10633 // that it is in input element units, not in bytes. Convert now.
10634
10635 // For little endian, the order of the input vectors is reversed, and
10636 // the permutation mask is complemented with respect to 31. This is
10637 // necessary to produce proper semantics with the big-endian-based vperm
10638 // instruction.
10639 EVT EltVT = V1.getValueType().getVectorElementType();
10640 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10641
10642 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10643 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10644
10645 /*
10646 Vectors will be appended like so: [ V1 | v2 ]
10647 XXSWAPD on V1:
10648 [ A | B | C | D ] -> [ C | D | A | B ]
10649 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10650 i.e. index of A, B += 8, and index of C, D -= 8.
10651 XXSWAPD on V2:
10652 [ E | F | G | H ] -> [ G | H | E | F ]
10653 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10654 i.e. index of E, F += 8, index of G, H -= 8
10655 Swap V1 and V2:
10656 [ V1 | V2 ] -> [ V2 | V1 ]
10657 0-15 16-31 0-15 16-31
10658 i.e. index of V1 += 16, index of V2 -= 16
10659 */
10660
10661 SmallVector<SDValue, 16> ResultMask;
10662 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10663 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10664
10665 if (V1HasXXSWAPD) {
10666 if (SrcElt < 8)
10667 SrcElt += 8;
10668 else if (SrcElt < 16)
10669 SrcElt -= 8;
10670 }
10671 if (V2HasXXSWAPD) {
10672 if (SrcElt > 23)
10673 SrcElt -= 8;
10674 else if (SrcElt > 15)
10675 SrcElt += 8;
10676 }
10677 if (NeedSwap) {
10678 if (SrcElt < 16)
10679 SrcElt += 16;
10680 else
10681 SrcElt -= 16;
10682 }
10683 for (unsigned j = 0; j != BytesPerElement; ++j)
10684 if (isLittleEndian)
10685 ResultMask.push_back(
10686 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10687 else
10688 ResultMask.push_back(
10689 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10690 }
10691
10692 if (V1HasXXSWAPD) {
10693 dl = SDLoc(V1->getOperand(0));
10694 V1 = V1->getOperand(0)->getOperand(1);
10695 }
10696 if (V2HasXXSWAPD) {
10697 dl = SDLoc(V2->getOperand(0));
10698 V2 = V2->getOperand(0)->getOperand(1);
10699 }
10700
10701 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10702 if (ValType != MVT::v2f64)
10703 V1 = DAG.getBitcast(MVT::v2f64, V1);
10704 if (V2.getValueType() != MVT::v2f64)
10705 V2 = DAG.getBitcast(MVT::v2f64, V2);
10706 }
10707
10708 ShufflesHandledWithVPERM++;
10709 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10710 LLVM_DEBUG({
10711 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10712 if (Opcode == PPCISD::XXPERM) {
10713 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10714 } else {
10715 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10716 }
10717 SVOp->dump();
10718 dbgs() << "With the following permute control vector:\n";
10719 VPermMask.dump();
10720 });
10721
10722 if (Opcode == PPCISD::XXPERM)
10723 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10724
10725 // Only need to place items backwards in LE,
10726 // the mask was properly calculated.
10727 if (isLittleEndian)
10728 std::swap(V1, V2);
10729
10730 SDValue VPERMNode =
10731 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10732
10733 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10734 return VPERMNode;
10735}
10736
10737/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10738/// vector comparison. If it is, return true and fill in Opc/isDot with
10739/// information about the intrinsic.
10740static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10741 bool &isDot, const PPCSubtarget &Subtarget) {
10742 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10743 CompareOpc = -1;
10744 isDot = false;
10745 switch (IntrinsicID) {
10746 default:
10747 return false;
10748 // Comparison predicates.
10749 case Intrinsic::ppc_altivec_vcmpbfp_p:
10750 CompareOpc = 966;
10751 isDot = true;
10752 break;
10753 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10754 CompareOpc = 198;
10755 isDot = true;
10756 break;
10757 case Intrinsic::ppc_altivec_vcmpequb_p:
10758 CompareOpc = 6;
10759 isDot = true;
10760 break;
10761 case Intrinsic::ppc_altivec_vcmpequh_p:
10762 CompareOpc = 70;
10763 isDot = true;
10764 break;
10765 case Intrinsic::ppc_altivec_vcmpequw_p:
10766 CompareOpc = 134;
10767 isDot = true;
10768 break;
10769 case Intrinsic::ppc_altivec_vcmpequd_p:
10770 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10771 CompareOpc = 199;
10772 isDot = true;
10773 } else
10774 return false;
10775 break;
10776 case Intrinsic::ppc_altivec_vcmpneb_p:
10777 case Intrinsic::ppc_altivec_vcmpneh_p:
10778 case Intrinsic::ppc_altivec_vcmpnew_p:
10779 case Intrinsic::ppc_altivec_vcmpnezb_p:
10780 case Intrinsic::ppc_altivec_vcmpnezh_p:
10781 case Intrinsic::ppc_altivec_vcmpnezw_p:
10782 if (Subtarget.hasP9Altivec()) {
10783 switch (IntrinsicID) {
10784 default:
10785 llvm_unreachable("Unknown comparison intrinsic.");
10786 case Intrinsic::ppc_altivec_vcmpneb_p:
10787 CompareOpc = 7;
10788 break;
10789 case Intrinsic::ppc_altivec_vcmpneh_p:
10790 CompareOpc = 71;
10791 break;
10792 case Intrinsic::ppc_altivec_vcmpnew_p:
10793 CompareOpc = 135;
10794 break;
10795 case Intrinsic::ppc_altivec_vcmpnezb_p:
10796 CompareOpc = 263;
10797 break;
10798 case Intrinsic::ppc_altivec_vcmpnezh_p:
10799 CompareOpc = 327;
10800 break;
10801 case Intrinsic::ppc_altivec_vcmpnezw_p:
10802 CompareOpc = 391;
10803 break;
10804 }
10805 isDot = true;
10806 } else
10807 return false;
10808 break;
10809 case Intrinsic::ppc_altivec_vcmpgefp_p:
10810 CompareOpc = 454;
10811 isDot = true;
10812 break;
10813 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10814 CompareOpc = 710;
10815 isDot = true;
10816 break;
10817 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10818 CompareOpc = 774;
10819 isDot = true;
10820 break;
10821 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10822 CompareOpc = 838;
10823 isDot = true;
10824 break;
10825 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10826 CompareOpc = 902;
10827 isDot = true;
10828 break;
10829 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10830 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10831 CompareOpc = 967;
10832 isDot = true;
10833 } else
10834 return false;
10835 break;
10836 case Intrinsic::ppc_altivec_vcmpgtub_p:
10837 CompareOpc = 518;
10838 isDot = true;
10839 break;
10840 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10841 CompareOpc = 582;
10842 isDot = true;
10843 break;
10844 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10845 CompareOpc = 646;
10846 isDot = true;
10847 break;
10848 case Intrinsic::ppc_altivec_vcmpgtud_p:
10849 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10850 CompareOpc = 711;
10851 isDot = true;
10852 } else
10853 return false;
10854 break;
10855
10856 case Intrinsic::ppc_altivec_vcmpequq:
10857 case Intrinsic::ppc_altivec_vcmpgtsq:
10858 case Intrinsic::ppc_altivec_vcmpgtuq:
10859 if (!Subtarget.isISA3_1())
10860 return false;
10861 switch (IntrinsicID) {
10862 default:
10863 llvm_unreachable("Unknown comparison intrinsic.");
10864 case Intrinsic::ppc_altivec_vcmpequq:
10865 CompareOpc = 455;
10866 break;
10867 case Intrinsic::ppc_altivec_vcmpgtsq:
10868 CompareOpc = 903;
10869 break;
10870 case Intrinsic::ppc_altivec_vcmpgtuq:
10871 CompareOpc = 647;
10872 break;
10873 }
10874 break;
10875
10876 // VSX predicate comparisons use the same infrastructure
10877 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10878 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10879 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10880 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10881 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10882 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10883 if (Subtarget.hasVSX()) {
10884 switch (IntrinsicID) {
10885 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10886 CompareOpc = 99;
10887 break;
10888 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10889 CompareOpc = 115;
10890 break;
10891 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10892 CompareOpc = 107;
10893 break;
10894 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10895 CompareOpc = 67;
10896 break;
10897 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10898 CompareOpc = 83;
10899 break;
10900 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10901 CompareOpc = 75;
10902 break;
10903 }
10904 isDot = true;
10905 } else
10906 return false;
10907 break;
10908
10909 // Normal Comparisons.
10910 case Intrinsic::ppc_altivec_vcmpbfp:
10911 CompareOpc = 966;
10912 break;
10913 case Intrinsic::ppc_altivec_vcmpeqfp:
10914 CompareOpc = 198;
10915 break;
10916 case Intrinsic::ppc_altivec_vcmpequb:
10917 CompareOpc = 6;
10918 break;
10919 case Intrinsic::ppc_altivec_vcmpequh:
10920 CompareOpc = 70;
10921 break;
10922 case Intrinsic::ppc_altivec_vcmpequw:
10923 CompareOpc = 134;
10924 break;
10925 case Intrinsic::ppc_altivec_vcmpequd:
10926 if (Subtarget.hasP8Altivec())
10927 CompareOpc = 199;
10928 else
10929 return false;
10930 break;
10931 case Intrinsic::ppc_altivec_vcmpneb:
10932 case Intrinsic::ppc_altivec_vcmpneh:
10933 case Intrinsic::ppc_altivec_vcmpnew:
10934 case Intrinsic::ppc_altivec_vcmpnezb:
10935 case Intrinsic::ppc_altivec_vcmpnezh:
10936 case Intrinsic::ppc_altivec_vcmpnezw:
10937 if (Subtarget.hasP9Altivec())
10938 switch (IntrinsicID) {
10939 default:
10940 llvm_unreachable("Unknown comparison intrinsic.");
10941 case Intrinsic::ppc_altivec_vcmpneb:
10942 CompareOpc = 7;
10943 break;
10944 case Intrinsic::ppc_altivec_vcmpneh:
10945 CompareOpc = 71;
10946 break;
10947 case Intrinsic::ppc_altivec_vcmpnew:
10948 CompareOpc = 135;
10949 break;
10950 case Intrinsic::ppc_altivec_vcmpnezb:
10951 CompareOpc = 263;
10952 break;
10953 case Intrinsic::ppc_altivec_vcmpnezh:
10954 CompareOpc = 327;
10955 break;
10956 case Intrinsic::ppc_altivec_vcmpnezw:
10957 CompareOpc = 391;
10958 break;
10959 }
10960 else
10961 return false;
10962 break;
10963 case Intrinsic::ppc_altivec_vcmpgefp:
10964 CompareOpc = 454;
10965 break;
10966 case Intrinsic::ppc_altivec_vcmpgtfp:
10967 CompareOpc = 710;
10968 break;
10969 case Intrinsic::ppc_altivec_vcmpgtsb:
10970 CompareOpc = 774;
10971 break;
10972 case Intrinsic::ppc_altivec_vcmpgtsh:
10973 CompareOpc = 838;
10974 break;
10975 case Intrinsic::ppc_altivec_vcmpgtsw:
10976 CompareOpc = 902;
10977 break;
10978 case Intrinsic::ppc_altivec_vcmpgtsd:
10979 if (Subtarget.hasP8Altivec())
10980 CompareOpc = 967;
10981 else
10982 return false;
10983 break;
10984 case Intrinsic::ppc_altivec_vcmpgtub:
10985 CompareOpc = 518;
10986 break;
10987 case Intrinsic::ppc_altivec_vcmpgtuh:
10988 CompareOpc = 582;
10989 break;
10990 case Intrinsic::ppc_altivec_vcmpgtuw:
10991 CompareOpc = 646;
10992 break;
10993 case Intrinsic::ppc_altivec_vcmpgtud:
10994 if (Subtarget.hasP8Altivec())
10995 CompareOpc = 711;
10996 else
10997 return false;
10998 break;
10999 case Intrinsic::ppc_altivec_vcmpequq_p:
11000 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11001 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11002 if (!Subtarget.isISA3_1())
11003 return false;
11004 switch (IntrinsicID) {
11005 default:
11006 llvm_unreachable("Unknown comparison intrinsic.");
11007 case Intrinsic::ppc_altivec_vcmpequq_p:
11008 CompareOpc = 455;
11009 break;
11010 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11011 CompareOpc = 903;
11012 break;
11013 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11014 CompareOpc = 647;
11015 break;
11016 }
11017 isDot = true;
11018 break;
11019 }
11020 return true;
11021}
11022
11023/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11024/// lower, do it, otherwise return null.
11025SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11026 SelectionDAG &DAG) const {
11027 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11028
11029 SDLoc dl(Op);
11030 // Note: BCD instructions expect the immediate operand in vector form (v4i32),
11031 // but the builtin provides it as a scalar. To satisfy the instruction
11032 // encoding, we splat the scalar across all lanes using SPLAT_VECTOR.
11033 auto MapNodeWithSplatVector =
11034 [&](unsigned Opcode,
11035 std::initializer_list<SDValue> ExtraOps = {}) -> SDValue {
11036 SDValue SplatVal =
11037 DAG.getNode(ISD::SPLAT_VECTOR, dl, MVT::v4i32, Op.getOperand(2));
11038
11039 SmallVector<SDValue, 4> Ops{SplatVal, Op.getOperand(1)};
11040 Ops.append(ExtraOps.begin(), ExtraOps.end());
11041 return DAG.getNode(Opcode, dl, MVT::v16i8, Ops);
11042 };
11043
11044 switch (IntrinsicID) {
11045 case Intrinsic::thread_pointer:
11046 // Reads the thread pointer register, used for __builtin_thread_pointer.
11047 if (Subtarget.isPPC64())
11048 return DAG.getRegister(PPC::X13, MVT::i64);
11049 return DAG.getRegister(PPC::R2, MVT::i32);
11050
11051 case Intrinsic::ppc_rldimi: {
11052 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11053 SDValue Src = Op.getOperand(1);
11054 APInt Mask = Op.getConstantOperandAPInt(4);
11055 if (Mask.isZero())
11056 return Op.getOperand(2);
11057 if (Mask.isAllOnes())
11058 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11059 uint64_t SH = Op.getConstantOperandVal(3);
11060 unsigned MB = 0, ME = 0;
11061 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11062 report_fatal_error("invalid rldimi mask!");
11063 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11064 if (ME < 63 - SH) {
11065 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11066 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11067 } else if (ME > 63 - SH) {
11068 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11069 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11070 }
11071 return SDValue(
11072 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11073 {Op.getOperand(2), Src,
11074 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11075 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11076 0);
11077 }
11078
11079 case Intrinsic::ppc_rlwimi: {
11080 APInt Mask = Op.getConstantOperandAPInt(4);
11081 if (Mask.isZero())
11082 return Op.getOperand(2);
11083 if (Mask.isAllOnes())
11084 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11085 Op.getOperand(3));
11086 unsigned MB = 0, ME = 0;
11087 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11088 report_fatal_error("invalid rlwimi mask!");
11089 return SDValue(DAG.getMachineNode(
11090 PPC::RLWIMI, dl, MVT::i32,
11091 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11092 DAG.getTargetConstant(MB, dl, MVT::i32),
11093 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11094 0);
11095 }
11096
11097 case Intrinsic::ppc_bcdshift:
11098 return MapNodeWithSplatVector(PPCISD::BCDSHIFT, {Op.getOperand(3)});
11099 case Intrinsic::ppc_bcdshiftround:
11100 return MapNodeWithSplatVector(PPCISD::BCDSHIFTROUND, {Op.getOperand(3)});
11101 case Intrinsic::ppc_bcdtruncate:
11102 return MapNodeWithSplatVector(PPCISD::BCDTRUNC, {Op.getOperand(3)});
11103 case Intrinsic::ppc_bcdunsignedtruncate:
11104 return MapNodeWithSplatVector(PPCISD::BCDUTRUNC);
11105 case Intrinsic::ppc_bcdunsignedshift:
11106 return MapNodeWithSplatVector(PPCISD::BCDUSHIFT);
11107
11108 case Intrinsic::ppc_rlwnm: {
11109 if (Op.getConstantOperandVal(3) == 0)
11110 return DAG.getConstant(0, dl, MVT::i32);
11111 unsigned MB = 0, ME = 0;
11112 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11113 report_fatal_error("invalid rlwnm mask!");
11114 return SDValue(
11115 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11116 {Op.getOperand(1), Op.getOperand(2),
11117 DAG.getTargetConstant(MB, dl, MVT::i32),
11118 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11119 0);
11120 }
11121
11122 case Intrinsic::ppc_mma_disassemble_acc: {
11123 if (Subtarget.isISAFuture()) {
11124 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11125 SDValue WideVec =
11126 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11127 Op.getOperand(1)),
11128 0);
11130 SDValue Value = SDValue(WideVec.getNode(), 0);
11131 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11132
11133 SDValue Extract;
11134 Extract = DAG.getNode(
11135 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11136 Subtarget.isLittleEndian() ? Value2 : Value,
11137 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11138 dl, getPointerTy(DAG.getDataLayout())));
11139 RetOps.push_back(Extract);
11140 Extract = DAG.getNode(
11141 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11142 Subtarget.isLittleEndian() ? Value2 : Value,
11143 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11144 dl, getPointerTy(DAG.getDataLayout())));
11145 RetOps.push_back(Extract);
11146 Extract = DAG.getNode(
11147 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11148 Subtarget.isLittleEndian() ? Value : Value2,
11149 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11150 dl, getPointerTy(DAG.getDataLayout())));
11151 RetOps.push_back(Extract);
11152 Extract = DAG.getNode(
11153 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11154 Subtarget.isLittleEndian() ? Value : Value2,
11155 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11156 dl, getPointerTy(DAG.getDataLayout())));
11157 RetOps.push_back(Extract);
11158 return DAG.getMergeValues(RetOps, dl);
11159 }
11160 [[fallthrough]];
11161 }
11162 case Intrinsic::ppc_vsx_disassemble_pair: {
11163 int NumVecs = 2;
11164 SDValue WideVec = Op.getOperand(1);
11165 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11166 NumVecs = 4;
11167 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11168 }
11170 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11171 SDValue Extract = DAG.getNode(
11172 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11173 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11174 : VecNo,
11175 dl, getPointerTy(DAG.getDataLayout())));
11176 RetOps.push_back(Extract);
11177 }
11178 return DAG.getMergeValues(RetOps, dl);
11179 }
11180
11181 case Intrinsic::ppc_build_dmr: {
11184 for (int i = 1; i < 9; i += 2) {
11185 SDValue Hi = Op.getOperand(i);
11186 SDValue Lo = Op.getOperand(i + 1);
11187 if (Hi->getOpcode() == ISD::LOAD)
11188 Chains.push_back(Hi.getValue(1));
11189 if (Lo->getOpcode() == ISD::LOAD)
11190 Chains.push_back(Lo.getValue(1));
11191 Pairs.push_back(
11192 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11193 }
11194 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11195 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11196 return DAG.getMergeValues({Value, TF}, dl);
11197 }
11198
11199 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11200 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11201 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11202 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11203 "Specify P of 0 or 1 for lower or upper 512 bytes");
11204 unsigned HiLo = Idx->getSExtValue();
11205 unsigned Opcode;
11206 unsigned Subx;
11207 if (HiLo == 0) {
11208 Opcode = PPC::DMXXEXTFDMR512;
11209 Subx = PPC::sub_wacc_lo;
11210 } else {
11211 Opcode = PPC::DMXXEXTFDMR512_HI;
11212 Subx = PPC::sub_wacc_hi;
11213 }
11214 SDValue Subreg(
11215 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11216 Op.getOperand(1),
11217 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11218 0);
11219 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11220 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11221 }
11222
11223 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11224 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11225 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11226 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11227 "Specify a dmr row pair 0-3");
11228 unsigned IdxVal = Idx->getSExtValue();
11229 unsigned Subx;
11230 switch (IdxVal) {
11231 case 0:
11232 Subx = PPC::sub_dmrrowp0;
11233 break;
11234 case 1:
11235 Subx = PPC::sub_dmrrowp1;
11236 break;
11237 case 2:
11238 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11239 break;
11240 case 3:
11241 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11242 break;
11243 }
11244 SDValue Subreg(
11245 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11246 Op.getOperand(1),
11247 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11248 0);
11249 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11250 return SDValue(
11251 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11252 0);
11253 }
11254
11255 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11256 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11257 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11258 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11259 "Specify P of 0 or 1 for lower or upper 512 bytes");
11260 unsigned HiLo = Idx->getSExtValue();
11261 unsigned Opcode;
11262 unsigned Subx;
11263 if (HiLo == 0) {
11264 Opcode = PPCISD::INST512;
11265 Subx = PPC::sub_wacc_lo;
11266 } else {
11267 Opcode = PPCISD::INST512HI;
11268 Subx = PPC::sub_wacc_hi;
11269 }
11270 SDValue Wacc = DAG.getNode(Opcode, dl, MVT::v512i1, Op.getOperand(2),
11271 Op.getOperand(3));
11272 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11273 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11274 Op.getOperand(1), Wacc, SubReg),
11275 0);
11276 }
11277
11278 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11279 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11280 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11281 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11282 "Specify a dmr row pair 0-3");
11283 unsigned IdxVal = Idx->getSExtValue();
11284 unsigned Subx;
11285 switch (IdxVal) {
11286 case 0:
11287 Subx = PPC::sub_dmrrowp0;
11288 break;
11289 case 1:
11290 Subx = PPC::sub_dmrrowp1;
11291 break;
11292 case 2:
11293 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11294 break;
11295 case 3:
11296 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11297 break;
11298 }
11299 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11300 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11301 SDValue DMRRowp =
11302 DAG.getNode(PPCISD::INST256, dl, MVT::v256i1, Op.getOperand(2), P);
11303 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11304 Op.getOperand(1), DMRRowp, SubReg),
11305 0);
11306 }
11307
11308 case Intrinsic::ppc_mma_xxmfacc:
11309 case Intrinsic::ppc_mma_xxmtacc: {
11310 // Allow pre-isa-future subtargets to lower as normal.
11311 if (!Subtarget.isISAFuture())
11312 return SDValue();
11313 // The intrinsics for xxmtacc and xxmfacc take one argument of
11314 // type v512i1, for future cpu the corresponding wacc instruction
11315 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11316 // the need to produce the xxm[t|f]acc.
11317 SDValue WideVec = Op.getOperand(1);
11318 DAG.ReplaceAllUsesWith(Op, WideVec);
11319 return SDValue();
11320 }
11321
11322 case Intrinsic::ppc_unpack_longdouble: {
11323 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11324 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11325 "Argument of long double unpack must be 0 or 1!");
11326 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11327 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11328 Idx->getValueType(0)));
11329 }
11330
11331 case Intrinsic::ppc_compare_exp_lt:
11332 case Intrinsic::ppc_compare_exp_gt:
11333 case Intrinsic::ppc_compare_exp_eq:
11334 case Intrinsic::ppc_compare_exp_uo: {
11335 unsigned Pred;
11336 switch (IntrinsicID) {
11337 case Intrinsic::ppc_compare_exp_lt:
11338 Pred = PPC::PRED_LT;
11339 break;
11340 case Intrinsic::ppc_compare_exp_gt:
11341 Pred = PPC::PRED_GT;
11342 break;
11343 case Intrinsic::ppc_compare_exp_eq:
11344 Pred = PPC::PRED_EQ;
11345 break;
11346 case Intrinsic::ppc_compare_exp_uo:
11347 Pred = PPC::PRED_UN;
11348 break;
11349 }
11350 return SDValue(
11351 DAG.getMachineNode(
11352 PPC::SELECT_CC_I4, dl, MVT::i32,
11353 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11354 Op.getOperand(1), Op.getOperand(2)),
11355 0),
11356 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11357 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11358 0);
11359 }
11360 case Intrinsic::ppc_test_data_class: {
11361 EVT OpVT = Op.getOperand(1).getValueType();
11362 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11363 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11364 : PPC::XSTSTDCSP);
11365 // Lower __builtin_ppc_test_data_class(value, mask) to XSTSTDC* instruction.
11366 // The XSTSTDC* instructions test if a floating-point value matches any of
11367 // the data classes specified in the mask, setting CR field bits
11368 // accordingly. We need to extract the EQ bit (bit 2) from the CR field and
11369 // convert it to an integer result (1 if match, 0 if no match).
11370 //
11371 // Note: Operands are swapped because XSTSTDC* expects (mask, value) but the
11372 // intrinsic provides (value, mask) as Op.getOperand(1) and
11373 // Op.getOperand(2).
11374 SDValue TestDataClass =
11375 SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32,
11376 {Op.getOperand(2), Op.getOperand(1)}),
11377 0);
11378 if (Subtarget.isISA3_1()) {
11379 // ISA 3.1+: Use SETBC instruction to directly convert CR bit to integer.
11380 // This is more efficient than the SELECT_CC approach used in earlier
11381 // ISAs.
11382 SDValue SubRegIdx = DAG.getTargetConstant(PPC::sub_eq, dl, MVT::i32);
11383 SDValue CRBit =
11384 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11385 TestDataClass, SubRegIdx),
11386 0);
11387
11388 return DAG.getNode(PPCISD::SETBC, dl, MVT::i32, CRBit);
11389 }
11390
11391 // Pre-ISA 3.1: Use SELECT_CC to convert CR field to integer (1 or 0).
11392 return SDValue(
11393 DAG.getMachineNode(PPC::SELECT_CC_I4, dl, MVT::i32,
11394 {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
11395 DAG.getConstant(0, dl, MVT::i32),
11396 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11397 0);
11398 }
11399 case Intrinsic::ppc_fnmsub: {
11400 EVT VT = Op.getOperand(1).getValueType();
11401 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11402 return DAG.getNode(
11403 ISD::FNEG, dl, VT,
11404 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11405 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11406 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11407 Op.getOperand(2), Op.getOperand(3));
11408 }
11409 case Intrinsic::ppc_convert_f128_to_ppcf128:
11410 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11411 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11412 ? RTLIB::CONVERT_PPCF128_F128
11413 : RTLIB::CONVERT_F128_PPCF128;
11414 MakeLibCallOptions CallOptions;
11415 std::pair<SDValue, SDValue> Result =
11416 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11417 dl, SDValue());
11418 return Result.first;
11419 }
11420 case Intrinsic::ppc_maxfe:
11421 case Intrinsic::ppc_maxfl:
11422 case Intrinsic::ppc_maxfs:
11423 case Intrinsic::ppc_minfe:
11424 case Intrinsic::ppc_minfl:
11425 case Intrinsic::ppc_minfs: {
11426 EVT VT = Op.getValueType();
11427 assert(
11428 all_of(Op->ops().drop_front(4),
11429 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11430 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11431 (void)VT;
11433 if (IntrinsicID == Intrinsic::ppc_minfe ||
11434 IntrinsicID == Intrinsic::ppc_minfl ||
11435 IntrinsicID == Intrinsic::ppc_minfs)
11436 CC = ISD::SETLT;
11437 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11438 SDValue Res = Op.getOperand(I);
11439 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11440 Res =
11441 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11442 }
11443 return Res;
11444 }
11445 }
11446
11447 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11448 // opcode number of the comparison.
11449 int CompareOpc;
11450 bool isDot;
11451 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11452 return SDValue(); // Don't custom lower most intrinsics.
11453
11454 // If this is a non-dot comparison, make the VCMP node and we are done.
11455 if (!isDot) {
11456 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11457 Op.getOperand(1), Op.getOperand(2),
11458 DAG.getConstant(CompareOpc, dl, MVT::i32));
11459 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11460 }
11461
11462 // Create the PPCISD altivec 'dot' comparison node.
11463 SDValue Ops[] = {
11464 Op.getOperand(2), // LHS
11465 Op.getOperand(3), // RHS
11466 DAG.getConstant(CompareOpc, dl, MVT::i32)
11467 };
11468 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11469 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11470
11471 // Unpack the result based on how the target uses it.
11472 unsigned BitNo; // Bit # of CR6.
11473 bool InvertBit; // Invert result?
11474 unsigned Bitx;
11475 unsigned SetOp;
11476 switch (Op.getConstantOperandVal(1)) {
11477 default: // Can't happen, don't crash on invalid number though.
11478 case 0: // Return the value of the EQ bit of CR6.
11479 BitNo = 0;
11480 InvertBit = false;
11481 Bitx = PPC::sub_eq;
11482 SetOp = PPCISD::SETBC;
11483 break;
11484 case 1: // Return the inverted value of the EQ bit of CR6.
11485 BitNo = 0;
11486 InvertBit = true;
11487 Bitx = PPC::sub_eq;
11488 SetOp = PPCISD::SETBCR;
11489 break;
11490 case 2: // Return the value of the LT bit of CR6.
11491 BitNo = 2;
11492 InvertBit = false;
11493 Bitx = PPC::sub_lt;
11494 SetOp = PPCISD::SETBC;
11495 break;
11496 case 3: // Return the inverted value of the LT bit of CR6.
11497 BitNo = 2;
11498 InvertBit = true;
11499 Bitx = PPC::sub_lt;
11500 SetOp = PPCISD::SETBCR;
11501 break;
11502 }
11503
11504 SDValue GlueOp = CompNode.getValue(1);
11505 if (Subtarget.isISA3_1()) {
11506 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11507 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11508 SDValue CRBit =
11509 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11510 CR6Reg, SubRegIdx, GlueOp),
11511 0);
11512 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11513 }
11514
11515 // Now that we have the comparison, emit a copy from the CR to a GPR.
11516 // This is flagged to the above dot comparison.
11517 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11518 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11519
11520 // Shift the bit into the low position.
11521 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11522 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11523 // Isolate the bit.
11524 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11525 DAG.getConstant(1, dl, MVT::i32));
11526
11527 // If we are supposed to, toggle the bit.
11528 if (InvertBit)
11529 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11530 DAG.getConstant(1, dl, MVT::i32));
11531 return Flags;
11532}
11533
11534SDValue PPCTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11535 SelectionDAG &DAG) const {
11536 unsigned IntrinsicID = Op.getConstantOperandVal(1);
11537 SDLoc dl(Op);
11538 switch (IntrinsicID) {
11539 case Intrinsic::ppc_amo_lwat_csne:
11540 case Intrinsic::ppc_amo_ldat_csne:
11541 SDValue Chain = Op.getOperand(0);
11542 SDValue Ptr = Op.getOperand(2);
11543 SDValue CmpVal = Op.getOperand(3);
11544 SDValue NewVal = Op.getOperand(4);
11545
11546 EVT VT = IntrinsicID == Intrinsic::ppc_amo_ldat_csne ? MVT::i64 : MVT::i32;
11547 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
11548 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
11549
11551 Args.emplace_back(DAG.getUNDEF(MVT::i64),
11553 Args.emplace_back(CmpVal, Ty);
11554 Args.emplace_back(NewVal, Ty);
11555 Args.emplace_back(Ptr, IntPtrTy);
11556
11557 // Lower to dummy call to use ABI for consecutive register allocation.
11558 // Places return value, compare value, and new value in X3/X4/X5 as required
11559 // by lwat/ldat FC=16, avoiding a new register class for 3 adjacent
11560 // registers.
11561 const char *SymName = IntrinsicID == Intrinsic::ppc_amo_ldat_csne
11562 ? "__ldat_csne_pseudo"
11563 : "__lwat_csne_pseudo";
11564 SDValue Callee =
11565 DAG.getExternalSymbol(SymName, getPointerTy(DAG.getDataLayout()));
11566
11567 TargetLowering::CallLoweringInfo CLI(DAG);
11568 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(CallingConv::C, Ty, Callee,
11569 std::move(Args));
11570
11571 auto Result = LowerCallTo(CLI);
11572 return DAG.getMergeValues({Result.first, Result.second}, dl);
11573 }
11574 return SDValue();
11575}
11576
11577SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11578 SelectionDAG &DAG) const {
11579 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11580 // the beginning of the argument list.
11581 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11582 SDLoc DL(Op);
11583 switch (Op.getConstantOperandVal(ArgStart)) {
11584 case Intrinsic::ppc_cfence: {
11585 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11586 SDValue Val = Op.getOperand(ArgStart + 1);
11587 EVT Ty = Val.getValueType();
11588 if (Ty == MVT::i128) {
11589 // FIXME: Testing one of two paired registers is sufficient to guarantee
11590 // ordering?
11591 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11592 }
11593 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11594 return SDValue(
11595 DAG.getMachineNode(
11596 Opcode, DL, MVT::Other,
11597 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11598 Op.getOperand(0)),
11599 0);
11600 }
11601 case Intrinsic::ppc_disassemble_dmr: {
11602 assert(ArgStart == 1 &&
11603 "llvm.ppc.disassemble.dmr must carry a chain argument.");
11604 return DAG.getStore(Op.getOperand(0), DL, Op.getOperand(ArgStart + 2),
11605 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11606 }
11607 case Intrinsic::ppc_amo_stwat:
11608 case Intrinsic::ppc_amo_stdat: {
11609 SDLoc dl(Op);
11610 SDValue Chain = Op.getOperand(0);
11611 SDValue Ptr = Op.getOperand(ArgStart + 1);
11612 SDValue Val = Op.getOperand(ArgStart + 2);
11613 SDValue FC = Op.getOperand(ArgStart + 3);
11614
11615 return DAG.getNode(PPCISD::STAT, dl, MVT::Other, Chain, Val, Ptr, FC);
11616 }
11617 default:
11618 break;
11619 }
11620 return SDValue();
11621}
11622
11623// Lower scalar BSWAP64 to xxbrd.
11624SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11625 SDLoc dl(Op);
11626 if (!Subtarget.isPPC64())
11627 return Op;
11628 // MTVSRDD
11629 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11630 Op.getOperand(0));
11631 // XXBRD
11632 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11633 // MFVSRD
11634 int VectorIndex = 0;
11635 if (Subtarget.isLittleEndian())
11636 VectorIndex = 1;
11637 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11638 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11639 return Op;
11640}
11641
11642// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11643// compared to a value that is atomically loaded (atomic loads zero-extend).
11644SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11645 SelectionDAG &DAG) const {
11646 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11647 "Expecting an atomic compare-and-swap here.");
11648 SDLoc dl(Op);
11649 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11650 EVT MemVT = AtomicNode->getMemoryVT();
11651 if (MemVT.getSizeInBits() >= 32)
11652 return Op;
11653
11654 SDValue CmpOp = Op.getOperand(2);
11655 // If this is already correctly zero-extended, leave it alone.
11656 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11657 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11658 return Op;
11659
11660 // Clear the high bits of the compare operand.
11661 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11662 SDValue NewCmpOp =
11663 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11664 DAG.getConstant(MaskVal, dl, MVT::i32));
11665
11666 // Replace the existing compare operand with the properly zero-extended one.
11668 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11669 Ops.push_back(AtomicNode->getOperand(i));
11670 Ops[2] = NewCmpOp;
11671 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11672 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11673 auto NodeTy =
11674 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11675 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11676}
11677
11678SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11679 SelectionDAG &DAG) const {
11680 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11681 EVT MemVT = N->getMemoryVT();
11682 assert(MemVT.getSimpleVT() == MVT::i128 &&
11683 "Expect quadword atomic operations");
11684 SDLoc dl(N);
11685 unsigned Opc = N->getOpcode();
11686 switch (Opc) {
11687 case ISD::ATOMIC_LOAD: {
11688 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11689 // lowered to ppc instructions by pattern matching instruction selector.
11690 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11692 N->getOperand(0),
11693 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11694 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11695 Ops.push_back(N->getOperand(I));
11696 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11697 Ops, MemVT, N->getMemOperand());
11698 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11699 SDValue ValHi =
11700 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11701 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11702 DAG.getConstant(64, dl, MVT::i32));
11703 SDValue Val =
11704 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11705 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11706 {Val, LoadedVal.getValue(2)});
11707 }
11708 case ISD::ATOMIC_STORE: {
11709 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11710 // lowered to ppc instructions by pattern matching instruction selector.
11711 SDVTList Tys = DAG.getVTList(MVT::Other);
11713 N->getOperand(0),
11714 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11715 SDValue Val = N->getOperand(1);
11716 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11717 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11718 DAG.getConstant(64, dl, MVT::i32));
11719 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11720 Ops.push_back(ValLo);
11721 Ops.push_back(ValHi);
11722 Ops.push_back(N->getOperand(2));
11723 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11724 N->getMemOperand());
11725 }
11726 default:
11727 llvm_unreachable("Unexpected atomic opcode");
11728 }
11729}
11730
11732 SelectionDAG &DAG,
11733 const PPCSubtarget &Subtarget) {
11734 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11735
11736 enum DataClassMask {
11737 DC_NAN = 1 << 6,
11738 DC_NEG_INF = 1 << 4,
11739 DC_POS_INF = 1 << 5,
11740 DC_NEG_ZERO = 1 << 2,
11741 DC_POS_ZERO = 1 << 3,
11742 DC_NEG_SUBNORM = 1,
11743 DC_POS_SUBNORM = 1 << 1,
11744 };
11745
11746 EVT VT = Op.getValueType();
11747
11748 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11749 : VT == MVT::f64 ? PPC::XSTSTDCDP
11750 : PPC::XSTSTDCSP;
11751
11752 if (Mask == fcAllFlags)
11753 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11754 if (Mask == 0)
11755 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11756
11757 // When it's cheaper or necessary to test reverse flags.
11758 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11759 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11760 return DAG.getNOT(Dl, Rev, MVT::i1);
11761 }
11762
11763 // Power doesn't support testing whether a value is 'normal'. Test the rest
11764 // first, and test if it's 'not not-normal' with expected sign.
11765 if (Mask & fcNormal) {
11766 SDValue Rev(DAG.getMachineNode(
11767 TestOp, Dl, MVT::i32,
11768 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11769 DC_NEG_ZERO | DC_POS_ZERO |
11770 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11771 Dl, MVT::i32),
11772 Op),
11773 0);
11774 // Sign are stored in CR bit 0, result are in CR bit 2.
11775 SDValue Sign(
11776 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11777 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11778 0);
11779 SDValue Normal(DAG.getNOT(
11780 Dl,
11782 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11783 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11784 0),
11785 MVT::i1));
11786 if (Mask & fcPosNormal)
11787 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11788 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11789 if (Mask == fcPosNormal || Mask == fcNegNormal)
11790 return Result;
11791
11792 return DAG.getNode(
11793 ISD::OR, Dl, MVT::i1,
11794 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11795 }
11796
11797 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11798 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11799 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11800 bool IsQuiet = Mask & fcQNan;
11801 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11802
11803 // Quietness is determined by the first bit in fraction field.
11804 uint64_t QuietMask = 0;
11805 SDValue HighWord;
11806 if (VT == MVT::f128) {
11807 HighWord = DAG.getNode(
11808 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11809 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11810 QuietMask = 0x8000;
11811 } else if (VT == MVT::f64) {
11812 if (Subtarget.isPPC64()) {
11813 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11814 DAG.getBitcast(MVT::i64, Op),
11815 DAG.getConstant(1, Dl, MVT::i32));
11816 } else {
11817 SDValue Vec = DAG.getBitcast(
11818 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11819 HighWord = DAG.getNode(
11820 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11821 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11822 }
11823 QuietMask = 0x80000;
11824 } else if (VT == MVT::f32) {
11825 HighWord = DAG.getBitcast(MVT::i32, Op);
11826 QuietMask = 0x400000;
11827 }
11828 SDValue NanRes = DAG.getSetCC(
11829 Dl, MVT::i1,
11830 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11831 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11832 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11833 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11834 if (Mask == fcQNan || Mask == fcSNan)
11835 return NanRes;
11836
11837 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11838 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11839 NanRes);
11840 }
11841
11842 unsigned NativeMask = 0;
11843 if ((Mask & fcNan) == fcNan)
11844 NativeMask |= DC_NAN;
11845 if (Mask & fcNegInf)
11846 NativeMask |= DC_NEG_INF;
11847 if (Mask & fcPosInf)
11848 NativeMask |= DC_POS_INF;
11849 if (Mask & fcNegZero)
11850 NativeMask |= DC_NEG_ZERO;
11851 if (Mask & fcPosZero)
11852 NativeMask |= DC_POS_ZERO;
11853 if (Mask & fcNegSubnormal)
11854 NativeMask |= DC_NEG_SUBNORM;
11855 if (Mask & fcPosSubnormal)
11856 NativeMask |= DC_POS_SUBNORM;
11857 return SDValue(
11858 DAG.getMachineNode(
11859 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11861 TestOp, Dl, MVT::i32,
11862 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11863 0),
11864 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11865 0);
11866}
11867
11868SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11869 SelectionDAG &DAG) const {
11870 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11871 SDValue LHS = Op.getOperand(0);
11872 uint64_t RHSC = Op.getConstantOperandVal(1);
11873 SDLoc Dl(Op);
11874 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11875 if (LHS.getValueType() == MVT::ppcf128) {
11876 // The higher part determines the value class.
11877 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11878 DAG.getConstant(1, Dl, MVT::i32));
11879 }
11880
11881 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11882}
11883
11884// Adjust the length value for a load/store with length to account for the
11885// instructions requiring a left justified length, and for non-byte element
11886// types requiring scaling by element size.
11887static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11888 SelectionDAG &DAG) {
11889 SDLoc dl(Val);
11890 EVT VT = Val->getValueType(0);
11891 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11892 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11893 SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11894 return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11895}
11896
11897SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11898 auto VPLD = cast<VPLoadSDNode>(Op);
11899 bool Future = Subtarget.isISAFuture();
11900 SDLoc dl(Op);
11901 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11902 "Mask predication not supported");
11903 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11904 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
11905 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11906 unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
11907 Len = AdjustLength(Len, EltBits, !Future, DAG);
11908 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11909 VPLD->getOperand(1), Len};
11910 SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
11911 SDValue VPL =
11913 VPLD->getMemoryVT(), VPLD->getMemOperand());
11914 return VPL;
11915}
11916
11917SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11918 auto VPST = cast<VPStoreSDNode>(Op);
11919 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11920 "Mask predication not supported");
11921 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11922 SDLoc dl(Op);
11923 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
11924 unsigned EltBits =
11925 Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
11926 bool Future = Subtarget.isISAFuture();
11927 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11928 Len = AdjustLength(Len, EltBits, !Future, DAG);
11929 SDValue Ops[] = {
11930 VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11931 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
11932 VPST->getOperand(2), Len};
11933 SDVTList Tys = DAG.getVTList(MVT::Other);
11934 SDValue VPS =
11936 VPST->getMemoryVT(), VPST->getMemOperand());
11937 return VPS;
11938}
11939
11940SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11941 SelectionDAG &DAG) const {
11942 SDLoc dl(Op);
11943
11944 MachineFunction &MF = DAG.getMachineFunction();
11945 SDValue Op0 = Op.getOperand(0);
11946 EVT ValVT = Op0.getValueType();
11947 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11948 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11949 int64_t IntVal = Op.getConstantOperandVal(0);
11950 if (IntVal >= -16 && IntVal <= 15)
11951 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11952 dl);
11953 }
11954
11955 ReuseLoadInfo RLI;
11956 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11957 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11958 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11959 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11960
11961 MachineMemOperand *MMO =
11963 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11964 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11966 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11967 MVT::i32, MMO);
11968 if (RLI.ResChain)
11969 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11970 return Bits.getValue(0);
11971 }
11972
11973 // Create a stack slot that is 16-byte aligned.
11974 MachineFrameInfo &MFI = MF.getFrameInfo();
11975 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11976 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11977 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11978
11979 SDValue Val = Op0;
11980 // P10 hardware store forwarding requires that a single store contains all
11981 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11982 // to avoid load hit store on P10 when running binaries compiled for older
11983 // processors by generating two mergeable scalar stores to forward with the
11984 // vector load.
11985 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11986 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11987 ValVT.getSizeInBits() <= 64) {
11988 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11989 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11990 SDValue ShiftBy = DAG.getConstant(
11991 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11992 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11993 SDValue Plus8 =
11994 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11995 SDValue Store2 =
11996 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11997 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11998 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11999 MachinePointerInfo());
12000 }
12001
12002 // Store the input value into Value#0 of the stack slot.
12003 SDValue Store =
12004 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
12005 // Load it out.
12006 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
12007}
12008
12009SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
12010 SelectionDAG &DAG) const {
12011 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12012 "Should only be called for ISD::INSERT_VECTOR_ELT");
12013
12014 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
12015
12016 EVT VT = Op.getValueType();
12017 SDLoc dl(Op);
12018 SDValue V1 = Op.getOperand(0);
12019 SDValue V2 = Op.getOperand(1);
12020
12021 if (VT == MVT::v2f64 && C)
12022 return Op;
12023
12024 if (Subtarget.hasP9Vector()) {
12025 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
12026 // because on P10, it allows this specific insert_vector_elt load pattern to
12027 // utilize the refactored load and store infrastructure in order to exploit
12028 // prefixed loads.
12029 // On targets with inexpensive direct moves (Power9 and up), a
12030 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
12031 // load since a single precision load will involve conversion to double
12032 // precision on the load followed by another conversion to single precision.
12033 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
12034 (isa<LoadSDNode>(V2))) {
12035 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
12036 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
12037 SDValue InsVecElt =
12038 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
12039 BitcastLoad, Op.getOperand(2));
12040 return DAG.getBitcast(MVT::v4f32, InsVecElt);
12041 }
12042 }
12043
12044 if (Subtarget.isISA3_1()) {
12045 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12046 return SDValue();
12047 // On P10, we have legal lowering for constant and variable indices for
12048 // all vectors.
12049 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12050 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12051 return Op;
12052 }
12053
12054 // Before P10, we have legal lowering for constant indices but not for
12055 // variable ones.
12056 if (!C)
12057 return SDValue();
12058
12059 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12060 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12061 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
12062 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12063 unsigned InsertAtElement = C->getZExtValue();
12064 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12065 if (Subtarget.isLittleEndian()) {
12066 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12067 }
12068 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
12069 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12070 }
12071 return Op;
12072}
12073
12074SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12075 SelectionDAG &DAG) const {
12076 SDLoc dl(Op);
12077 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12078 SDValue LoadChain = LN->getChain();
12079 SDValue BasePtr = LN->getBasePtr();
12080 EVT VT = Op.getValueType();
12081 bool IsV1024i1 = VT == MVT::v1024i1;
12082 bool IsV2048i1 = VT == MVT::v2048i1;
12083
12084 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12085 // Dense Math dmr pair registers, respectively.
12086 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12087 (void)IsV2048i1;
12088 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12089 "Dense Math support required.");
12090 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12091
12093 SmallVector<SDValue, 8> LoadChains;
12094
12095 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12096 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12097 MachineMemOperand *MMO = LN->getMemOperand();
12098 unsigned NumVecs = VT.getSizeInBits() / 256;
12099 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12100 MachineMemOperand *NewMMO =
12101 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12102 if (Idx > 0) {
12103 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12104 DAG.getConstant(32, dl, BasePtr.getValueType()));
12105 LoadOps[2] = BasePtr;
12106 }
12108 DAG.getVTList(MVT::v256i1, MVT::Other),
12109 LoadOps, MVT::v256i1, NewMMO);
12110 LoadChains.push_back(Ld.getValue(1));
12111 Loads.push_back(Ld);
12112 }
12113
12114 if (Subtarget.isLittleEndian()) {
12115 std::reverse(Loads.begin(), Loads.end());
12116 std::reverse(LoadChains.begin(), LoadChains.end());
12117 }
12118
12119 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12120 SDValue Value = DMFInsert1024(Loads, dl, DAG);
12121
12122 if (IsV1024i1) {
12123 return DAG.getMergeValues({Value, TF}, dl);
12124 }
12125
12126 // Handle Loads for V2048i1 which represents a dmr pair.
12127 SmallVector<SDValue, 4> MoreLoads{Loads[4], Loads[5], Loads[6], Loads[7]};
12128 SDValue Dmr1Value = DMFInsert1024(MoreLoads, dl, DAG);
12129
12130 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12131 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12132
12133 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12134 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12135
12136 SDValue DmrPValue = SDValue(
12137 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12138
12139 return DAG.getMergeValues({DmrPValue, TF}, dl);
12140}
12141
12142SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12143 const SDLoc &dl,
12144 SelectionDAG &DAG) const {
12145 SDValue Lo =
12146 DAG.getNode(PPCISD::INST512, dl, MVT::v512i1, Pairs[0], Pairs[1]);
12147 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12148 SDValue Hi =
12149 DAG.getNode(PPCISD::INST512HI, dl, MVT::v512i1, Pairs[2], Pairs[3]);
12150 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12151 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12152
12153 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12154 {RC, Lo, LoSub, Hi, HiSub}),
12155 0);
12156}
12157
12158SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12159 SelectionDAG &DAG) const {
12160 SDLoc dl(Op);
12161 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12162 SDValue LoadChain = LN->getChain();
12163 SDValue BasePtr = LN->getBasePtr();
12164 EVT VT = Op.getValueType();
12165
12166 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12167 return LowerDMFVectorLoad(Op, DAG);
12168
12169 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12170 return Op;
12171
12172 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12173 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12174 "Type unsupported without MMA");
12175 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12176 "Type unsupported without paired vector support");
12177
12178 // For v256i1 on ISA Future, let the load go through to instruction selection
12179 // where it will be matched to lxvp/plxvp by the instruction patterns.
12180 if (VT == MVT::v256i1 && Subtarget.isISAFuture())
12181 return Op;
12182
12183 // For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
12184 // value in 2 or 4 vsx registers.
12185 Align Alignment = LN->getAlign();
12187 SmallVector<SDValue, 4> LoadChains;
12188 unsigned NumVecs = VT.getSizeInBits() / 128;
12189 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12190 SDValue Load =
12191 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12192 LN->getPointerInfo().getWithOffset(Idx * 16),
12193 commonAlignment(Alignment, Idx * 16),
12194 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12195 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12196 DAG.getConstant(16, dl, BasePtr.getValueType()));
12197 Loads.push_back(Load);
12198 LoadChains.push_back(Load.getValue(1));
12199 }
12200 if (Subtarget.isLittleEndian()) {
12201 std::reverse(Loads.begin(), Loads.end());
12202 std::reverse(LoadChains.begin(), LoadChains.end());
12203 }
12204 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12205 SDValue Value =
12206 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12207 dl, VT, Loads);
12208 SDValue RetOps[] = {Value, TF};
12209 return DAG.getMergeValues(RetOps, dl);
12210}
12211
12212SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12213 SelectionDAG &DAG) const {
12214
12215 SDLoc dl(Op);
12216 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12217 SDValue StoreChain = SN->getChain();
12218 SDValue BasePtr = SN->getBasePtr();
12221 EVT VT = SN->getValue().getValueType();
12222 bool IsV1024i1 = VT == MVT::v1024i1;
12223 bool IsV2048i1 = VT == MVT::v2048i1;
12224
12225 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12226 // Dense Math dmr pair registers, respectively.
12227 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12228 (void)IsV2048i1;
12229 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12230 "Dense Math support required.");
12231 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12232
12233 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12234 if (IsV1024i1) {
12236 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12237 Op.getOperand(1),
12238 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12239 0);
12241 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12242 Op.getOperand(1),
12243 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12244 0);
12245 MachineSDNode *ExtNode =
12246 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12247 Values.push_back(SDValue(ExtNode, 0));
12248 Values.push_back(SDValue(ExtNode, 1));
12249 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12250 Values.push_back(SDValue(ExtNode, 0));
12251 Values.push_back(SDValue(ExtNode, 1));
12252 } else {
12253 // This corresponds to v2048i1 which represents a dmr pair.
12254 SDValue Dmr0(
12255 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12256 Op.getOperand(1),
12257 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12258 0);
12259
12260 SDValue Dmr1(
12261 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12262 Op.getOperand(1),
12263 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12264 0);
12265
12266 SDValue Dmr0Lo(DAG.getMachineNode(
12267 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12268 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12269 0);
12270
12271 SDValue Dmr0Hi(DAG.getMachineNode(
12272 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12273 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12274 0);
12275
12276 SDValue Dmr1Lo(DAG.getMachineNode(
12277 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12278 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12279 0);
12280
12281 SDValue Dmr1Hi(DAG.getMachineNode(
12282 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12283 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12284 0);
12285
12286 MachineSDNode *ExtNode =
12287 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12288 Values.push_back(SDValue(ExtNode, 0));
12289 Values.push_back(SDValue(ExtNode, 1));
12290 ExtNode =
12291 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12292 Values.push_back(SDValue(ExtNode, 0));
12293 Values.push_back(SDValue(ExtNode, 1));
12294 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12295 Values.push_back(SDValue(ExtNode, 0));
12296 Values.push_back(SDValue(ExtNode, 1));
12297 ExtNode =
12298 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12299 Values.push_back(SDValue(ExtNode, 0));
12300 Values.push_back(SDValue(ExtNode, 1));
12301 }
12302
12303 if (Subtarget.isLittleEndian())
12304 std::reverse(Values.begin(), Values.end());
12305
12306 SDVTList Tys = DAG.getVTList(MVT::Other);
12308 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12309 Values[0], BasePtr};
12310 MachineMemOperand *MMO = SN->getMemOperand();
12311 unsigned NumVecs = VT.getSizeInBits() / 256;
12312 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12313 MachineMemOperand *NewMMO =
12314 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12315 if (Idx > 0) {
12316 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12317 DAG.getConstant(32, dl, BasePtr.getValueType()));
12318 Ops[3] = BasePtr;
12319 }
12320 Ops[2] = Values[Idx];
12322 MVT::v256i1, NewMMO);
12323 Stores.push_back(St);
12324 }
12325
12326 SDValue TF = DAG.getTokenFactor(dl, Stores);
12327 return TF;
12328}
12329
12330SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12331 SelectionDAG &DAG) const {
12332 SDLoc dl(Op);
12333 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12334 SDValue StoreChain = SN->getChain();
12335 SDValue BasePtr = SN->getBasePtr();
12336 SDValue Value = SN->getValue();
12337 SDValue Value2 = SN->getValue();
12338 EVT StoreVT = Value.getValueType();
12339
12340 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12341 return LowerDMFVectorStore(Op, DAG);
12342
12343 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12344 return Op;
12345
12346 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12347 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12348 "Type unsupported without MMA");
12349 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12350 "Type unsupported without paired vector support");
12351
12352 // For v256i1 on ISA Future, let the store go through to instruction selection
12353 // where it will be matched to stxvp/pstxvp by the instruction patterns.
12354 if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
12356 return Op;
12357
12358 // For other cases, create 2 or 4 v16i8 stores to store the pair or
12359 // accumulator underlying registers individually.
12360 Align Alignment = SN->getAlign();
12362 unsigned NumVecs = 2;
12363 if (StoreVT == MVT::v512i1) {
12364 if (Subtarget.isISAFuture()) {
12365 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12366 MachineSDNode *ExtNode = DAG.getMachineNode(
12367 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12368
12369 Value = SDValue(ExtNode, 0);
12370 Value2 = SDValue(ExtNode, 1);
12371 } else
12372 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12373 NumVecs = 4;
12374 }
12375 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12376 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12377 SDValue Elt;
12378 if (Subtarget.isISAFuture()) {
12379 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12380 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12381 Idx > 1 ? Value2 : Value,
12382 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12383 } else
12384 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12385 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12386
12387 SDValue Store =
12388 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12389 SN->getPointerInfo().getWithOffset(Idx * 16),
12390 commonAlignment(Alignment, Idx * 16),
12391 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12392 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12393 DAG.getConstant(16, dl, BasePtr.getValueType()));
12394 Stores.push_back(Store);
12395 }
12396 SDValue TF = DAG.getTokenFactor(dl, Stores);
12397 return TF;
12398}
12399
12400SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12401 SDLoc dl(Op);
12402 if (Op.getValueType() == MVT::v4i32) {
12403 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12404
12405 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12406 // +16 as shift amt.
12407 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12408 SDValue RHSSwap = // = vrlw RHS, 16
12409 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12410
12411 // Shrinkify inputs to v8i16.
12412 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12413 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12414 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12415
12416 // Low parts multiplied together, generating 32-bit results (we ignore the
12417 // top parts).
12418 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12419 LHS, RHS, DAG, dl, MVT::v4i32);
12420
12421 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12422 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12423 // Shift the high parts up 16 bits.
12424 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12425 Neg16, DAG, dl);
12426 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12427 } else if (Op.getValueType() == MVT::v16i8) {
12428 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12429 bool isLittleEndian = Subtarget.isLittleEndian();
12430
12431 // Multiply the even 8-bit parts, producing 16-bit sums.
12432 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12433 LHS, RHS, DAG, dl, MVT::v8i16);
12434 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12435
12436 // Multiply the odd 8-bit parts, producing 16-bit sums.
12437 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12438 LHS, RHS, DAG, dl, MVT::v8i16);
12439 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12440
12441 // Merge the results together. Because vmuleub and vmuloub are
12442 // instructions with a big-endian bias, we must reverse the
12443 // element numbering and reverse the meaning of "odd" and "even"
12444 // when generating little endian code.
12445 int Ops[16];
12446 for (unsigned i = 0; i != 8; ++i) {
12447 if (isLittleEndian) {
12448 Ops[i*2 ] = 2*i;
12449 Ops[i*2+1] = 2*i+16;
12450 } else {
12451 Ops[i*2 ] = 2*i+1;
12452 Ops[i*2+1] = 2*i+1+16;
12453 }
12454 }
12455 if (isLittleEndian)
12456 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12457 else
12458 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12459 } else {
12460 llvm_unreachable("Unknown mul to lower!");
12461 }
12462}
12463
12464SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12465 bool IsStrict = Op->isStrictFPOpcode();
12466 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12467 !Subtarget.hasP9Vector())
12468 return SDValue();
12469
12470 return Op;
12471}
12472
12473// Custom lowering for fpext vf32 to v2f64
12474SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12475
12476 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12477 "Should only be called for ISD::FP_EXTEND");
12478
12479 // FIXME: handle extends from half precision float vectors on P9.
12480 // We only want to custom lower an extend from v2f32 to v2f64.
12481 if (Op.getValueType() != MVT::v2f64 ||
12482 Op.getOperand(0).getValueType() != MVT::v2f32)
12483 return SDValue();
12484
12485 SDLoc dl(Op);
12486 SDValue Op0 = Op.getOperand(0);
12487
12488 switch (Op0.getOpcode()) {
12489 default:
12490 return SDValue();
12492 assert(Op0.getNumOperands() == 2 &&
12494 "Node should have 2 operands with second one being a constant!");
12495
12496 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12497 return SDValue();
12498
12499 // Custom lower is only done for high or low doubleword.
12500 int Idx = Op0.getConstantOperandVal(1);
12501 if (Idx % 2 != 0)
12502 return SDValue();
12503
12504 // Since input is v4f32, at this point Idx is either 0 or 2.
12505 // Shift to get the doubleword position we want.
12506 int DWord = Idx >> 1;
12507
12508 // High and low word positions are different on little endian.
12509 if (Subtarget.isLittleEndian())
12510 DWord ^= 0x1;
12511
12512 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12513 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12514 }
12515 case ISD::FADD:
12516 case ISD::FMUL:
12517 case ISD::FSUB: {
12518 SDValue NewLoad[2];
12519 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12520 // Ensure both input are loads.
12521 SDValue LdOp = Op0.getOperand(i);
12522 if (LdOp.getOpcode() != ISD::LOAD)
12523 return SDValue();
12524 // Generate new load node.
12525 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12526 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12527 NewLoad[i] = DAG.getMemIntrinsicNode(
12528 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12529 LD->getMemoryVT(), LD->getMemOperand());
12530 }
12531 SDValue NewOp =
12532 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12533 NewLoad[1], Op0.getNode()->getFlags());
12534 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12535 DAG.getConstant(0, dl, MVT::i32));
12536 }
12537 case ISD::LOAD: {
12538 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12539 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12540 SDValue NewLd = DAG.getMemIntrinsicNode(
12541 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12542 LD->getMemoryVT(), LD->getMemOperand());
12543 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12544 DAG.getConstant(0, dl, MVT::i32));
12545 }
12546 }
12547 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12548}
12549
12551 SelectionDAG &DAG,
12552 const PPCSubtarget &STI) {
12553 SDLoc DL(Value);
12554 if (STI.useCRBits())
12555 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12556 DAG.getConstant(1, DL, SumType),
12557 DAG.getConstant(0, DL, SumType));
12558 else
12559 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12560 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12561 Value, DAG.getAllOnesConstant(DL, SumType));
12562 return Sum.getValue(1);
12563}
12564
12566 EVT CarryType, SelectionDAG &DAG,
12567 const PPCSubtarget &STI) {
12568 SDLoc DL(Flag);
12569 SDValue Zero = DAG.getConstant(0, DL, SumType);
12570 SDValue Carry = DAG.getNode(
12571 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12572 if (STI.useCRBits())
12573 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12574 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12575}
12576
12577SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12578
12579 SDLoc DL(Op);
12580 SDNode *N = Op.getNode();
12581 EVT VT = N->getValueType(0);
12582 EVT CarryType = N->getValueType(1);
12583 unsigned Opc = N->getOpcode();
12584 bool IsAdd = Opc == ISD::UADDO;
12585 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12586 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12587 N->getOperand(0), N->getOperand(1));
12588 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12589 DAG, Subtarget);
12590 if (!IsAdd)
12591 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12592 DAG.getConstant(1UL, DL, CarryType));
12593 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12594}
12595
12596SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12597 SelectionDAG &DAG) const {
12598 SDLoc DL(Op);
12599 SDNode *N = Op.getNode();
12600 unsigned Opc = N->getOpcode();
12601 EVT VT = N->getValueType(0);
12602 EVT CarryType = N->getValueType(1);
12603 SDValue CarryOp = N->getOperand(2);
12604 bool IsAdd = Opc == ISD::UADDO_CARRY;
12605 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12606 if (!IsAdd)
12607 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12608 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12609 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12610 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12611 Op.getOperand(0), Op.getOperand(1), CarryOp);
12612 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12613 Subtarget);
12614 if (!IsAdd)
12615 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12616 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12617 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12618}
12619
12620SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12621
12622 SDLoc dl(Op);
12623 SDValue LHS = Op.getOperand(0);
12624 SDValue RHS = Op.getOperand(1);
12625 EVT VT = Op.getNode()->getValueType(0);
12626
12627 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12628
12629 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12630 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12631
12632 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12633
12634 SDValue Overflow =
12635 DAG.getNode(ISD::SRL, dl, VT, And,
12636 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12637
12638 SDValue OverflowTrunc =
12639 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12640
12641 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12642}
12643
12644/// Implements signed add with overflow detection using the rule:
12645/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12646SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12647
12648 SDLoc dl(Op);
12649 SDValue LHS = Op.getOperand(0);
12650 SDValue RHS = Op.getOperand(1);
12651 EVT VT = Op.getNode()->getValueType(0);
12652
12653 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
12654
12655 // Compute ~(x xor y)
12656 SDValue XorXY = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12657 SDValue EqvXY = DAG.getNOT(dl, XorXY, VT);
12658 // Compute (s xor x)
12659 SDValue SumXorX = DAG.getNode(ISD::XOR, dl, VT, Sum, LHS);
12660
12661 // overflow = (x eqv y) & (s xor x)
12662 SDValue OverflowInSign = DAG.getNode(ISD::AND, dl, VT, EqvXY, SumXorX);
12663
12664 // Shift sign bit down to LSB
12665 SDValue Overflow =
12666 DAG.getNode(ISD::SRL, dl, VT, OverflowInSign,
12667 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12668 // Truncate to the overflow type (i1)
12669 SDValue OverflowTrunc =
12670 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12671
12672 return DAG.getMergeValues({Sum, OverflowTrunc}, dl);
12673}
12674
12675// Lower unsigned 3-way compare producing -1/0/1.
12676SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12677 SDLoc DL(Op);
12678 SDValue A = DAG.getFreeze(Op.getOperand(0));
12679 SDValue B = DAG.getFreeze(Op.getOperand(1));
12680 EVT OpVT = A.getValueType();
12681 EVT ResVT = Op.getValueType();
12682
12683 // On PPC64, i32 carries are affected by the upper 32 bits of the registers.
12684 // We must zero-extend to i64 to ensure the carry reflects the 32-bit unsigned
12685 // comparison.
12686 if (Subtarget.isPPC64() && OpVT == MVT::i32) {
12687 A = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, A);
12688 B = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, B);
12689 OpVT = MVT::i64;
12690 }
12691
12692 // First compute diff = A - B.
12693 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12694
12695 // Generate B - A using SUBC to capture carry.
12696 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12697 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12698 SDValue CA0 = SubC.getValue(1);
12699
12700 // t2 = A - B + CA0 using SUBE.
12701 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12702 SDValue CA1 = SubE1.getValue(1);
12703
12704 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12705 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12706
12707 // Extract the first result and truncate to result type if needed.
12708 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12709}
12710
12711/// LowerOperation - Provide custom lowering hooks for some operations.
12712///
12714 switch (Op.getOpcode()) {
12715 default:
12716 llvm_unreachable("Wasn't expecting to be able to lower this!");
12717 case ISD::FPOW: return lowerPow(Op, DAG);
12718 case ISD::FSIN: return lowerSin(Op, DAG);
12719 case ISD::FCOS: return lowerCos(Op, DAG);
12720 case ISD::FLOG: return lowerLog(Op, DAG);
12721 case ISD::FLOG10: return lowerLog10(Op, DAG);
12722 case ISD::FEXP: return lowerExp(Op, DAG);
12723 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12724 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12725 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12726 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12727 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12728 case ISD::STRICT_FSETCC:
12730 case ISD::SETCC: return LowerSETCC(Op, DAG);
12731 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12732 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12733 case ISD::SSUBO:
12734 return LowerSSUBO(Op, DAG);
12735 case ISD::SADDO:
12736 return LowerSADDO(Op, DAG);
12737
12738 case ISD::INLINEASM:
12739 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12740 // Variable argument lowering.
12741 case ISD::VASTART: return LowerVASTART(Op, DAG);
12742 case ISD::VAARG: return LowerVAARG(Op, DAG);
12743 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12744
12745 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12746 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12748 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12749
12750 // Exception handling lowering.
12751 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12752 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12753 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12754
12755 case ISD::LOAD: return LowerLOAD(Op, DAG);
12756 case ISD::STORE: return LowerSTORE(Op, DAG);
12757 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12758 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12761 case ISD::FP_TO_UINT:
12762 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12765 case ISD::UINT_TO_FP:
12766 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12767 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12768 case ISD::SET_ROUNDING:
12769 return LowerSET_ROUNDING(Op, DAG);
12770
12771 // Lower 64-bit shifts.
12772 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12773 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12774 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12775
12776 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12777 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12778
12779 // Vector-related lowering.
12780 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12781 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12782 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12783 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12784 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12785 case ISD::MUL: return LowerMUL(Op, DAG);
12786 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12788 case ISD::FP_ROUND:
12789 return LowerFP_ROUND(Op, DAG);
12790 case ISD::ROTL: return LowerROTL(Op, DAG);
12791
12792 // For counter-based loop handling.
12794 return LowerINTRINSIC_W_CHAIN(Op, DAG);
12795
12796 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12797
12798 // Frame & Return address.
12799 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12800 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12801
12803 return LowerINTRINSIC_VOID(Op, DAG);
12804 case ISD::BSWAP:
12805 return LowerBSWAP(Op, DAG);
12807 return LowerATOMIC_CMP_SWAP(Op, DAG);
12808 case ISD::ATOMIC_STORE:
12809 return LowerATOMIC_LOAD_STORE(Op, DAG);
12810 case ISD::IS_FPCLASS:
12811 return LowerIS_FPCLASS(Op, DAG);
12812 case ISD::UADDO:
12813 case ISD::USUBO:
12814 return LowerADDSUBO(Op, DAG);
12815 case ISD::UADDO_CARRY:
12816 case ISD::USUBO_CARRY:
12817 return LowerADDSUBO_CARRY(Op, DAG);
12818 case ISD::UCMP:
12819 return LowerUCMP(Op, DAG);
12820 case ISD::STRICT_LRINT:
12821 case ISD::STRICT_LLRINT:
12822 case ISD::STRICT_LROUND:
12825 if (Op->getFlags().hasNoFPExcept())
12826 return Op;
12827 return SDValue();
12828 case ISD::VP_LOAD:
12829 return LowerVP_LOAD(Op, DAG);
12830 case ISD::VP_STORE:
12831 return LowerVP_STORE(Op, DAG);
12832 }
12833}
12834
12837 SelectionDAG &DAG) const {
12838 SDLoc dl(N);
12839 switch (N->getOpcode()) {
12840 default:
12841 llvm_unreachable("Do not know how to custom type legalize this operation!");
12842 case ISD::ATOMIC_LOAD: {
12843 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12844 Results.push_back(Res);
12845 Results.push_back(Res.getValue(1));
12846 break;
12847 }
12848 case ISD::READCYCLECOUNTER: {
12849 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12850 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12851
12852 Results.push_back(
12853 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12854 Results.push_back(RTB.getValue(2));
12855 break;
12856 }
12858 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12859 break;
12860
12861 assert(N->getValueType(0) == MVT::i1 &&
12862 "Unexpected result type for CTR decrement intrinsic");
12863 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12864 N->getValueType(0));
12865 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12866 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12867 N->getOperand(1));
12868
12869 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12870 Results.push_back(NewInt.getValue(1));
12871 break;
12872 }
12874 switch (N->getConstantOperandVal(0)) {
12875 case Intrinsic::ppc_pack_longdouble:
12876 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12877 N->getOperand(2), N->getOperand(1)));
12878 break;
12879 case Intrinsic::ppc_maxfe:
12880 case Intrinsic::ppc_minfe:
12881 case Intrinsic::ppc_fnmsub:
12882 case Intrinsic::ppc_convert_f128_to_ppcf128:
12883 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12884 break;
12885 }
12886 break;
12887 }
12888 case ISD::VAARG: {
12889 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12890 return;
12891
12892 EVT VT = N->getValueType(0);
12893
12894 if (VT == MVT::i64) {
12895 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12896
12897 Results.push_back(NewNode);
12898 Results.push_back(NewNode.getValue(1));
12899 }
12900 return;
12901 }
12904 case ISD::FP_TO_SINT:
12905 case ISD::FP_TO_UINT: {
12906 // LowerFP_TO_INT() can only handle f32 and f64.
12907 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12908 MVT::ppcf128)
12909 return;
12910 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12911 Results.push_back(LoweredValue);
12912 if (N->isStrictFPOpcode())
12913 Results.push_back(LoweredValue.getValue(1));
12914 return;
12915 }
12916 case ISD::TRUNCATE: {
12917 if (!N->getValueType(0).isVector())
12918 return;
12919 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12920 if (Lowered)
12921 Results.push_back(Lowered);
12922 return;
12923 }
12924 case ISD::SCALAR_TO_VECTOR: {
12925 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12926 if (Lowered)
12927 Results.push_back(Lowered);
12928 return;
12929 }
12930 case ISD::FSHL:
12931 case ISD::FSHR:
12932 // Don't handle funnel shifts here.
12933 return;
12934 case ISD::BITCAST:
12935 // Don't handle bitcast here.
12936 return;
12937 case ISD::FP_EXTEND:
12938 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12939 if (Lowered)
12940 Results.push_back(Lowered);
12941 return;
12942 }
12943}
12944
12945//===----------------------------------------------------------------------===//
12946// Other Lowering Code
12947//===----------------------------------------------------------------------===//
12948
12950 return Builder.CreateIntrinsic(Id, {});
12951}
12952
12954 Value *Addr,
12955 AtomicOrdering Ord) const {
12956 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12957
12958 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12959 "Only 8/16/32/64-bit atomic loads supported");
12960 Intrinsic::ID IntID;
12961 switch (SZ) {
12962 default:
12963 llvm_unreachable("Unexpected PrimitiveSize");
12964 case 8:
12965 IntID = Intrinsic::ppc_lbarx;
12966 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12967 break;
12968 case 16:
12969 IntID = Intrinsic::ppc_lharx;
12970 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12971 break;
12972 case 32:
12973 IntID = Intrinsic::ppc_lwarx;
12974 break;
12975 case 64:
12976 IntID = Intrinsic::ppc_ldarx;
12977 break;
12978 }
12979 Value *Call =
12980 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12981
12982 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12983}
12984
12985// Perform a store-conditional operation to Addr. Return the status of the
12986// store. This should be 0 if the store succeeded, non-zero otherwise.
12988 Value *Val, Value *Addr,
12989 AtomicOrdering Ord) const {
12990 Type *Ty = Val->getType();
12991 unsigned SZ = Ty->getPrimitiveSizeInBits();
12992
12993 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12994 "Only 8/16/32/64-bit atomic loads supported");
12995 Intrinsic::ID IntID;
12996 switch (SZ) {
12997 default:
12998 llvm_unreachable("Unexpected PrimitiveSize");
12999 case 8:
13000 IntID = Intrinsic::ppc_stbcx;
13001 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13002 break;
13003 case 16:
13004 IntID = Intrinsic::ppc_sthcx;
13005 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13006 break;
13007 case 32:
13008 IntID = Intrinsic::ppc_stwcx;
13009 break;
13010 case 64:
13011 IntID = Intrinsic::ppc_stdcx;
13012 break;
13013 }
13014
13015 if (SZ == 8 || SZ == 16)
13016 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
13017
13018 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
13019 /*FMFSource=*/nullptr, "stcx");
13020 return Builder.CreateXor(Call, Builder.getInt32(1));
13021}
13022
13023// The mappings for emitLeading/TrailingFence is taken from
13024// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
13026 Instruction *Inst,
13027 AtomicOrdering Ord) const {
13029 return callIntrinsic(Builder, Intrinsic::ppc_sync);
13030 if (isReleaseOrStronger(Ord))
13031 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13032 return nullptr;
13033}
13034
13036 Instruction *Inst,
13037 AtomicOrdering Ord) const {
13038 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
13039 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
13040 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
13041 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
13042 if (isa<LoadInst>(Inst))
13043 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
13044 {Inst});
13045 // FIXME: Can use isync for rmw operation.
13046 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13047 }
13048 return nullptr;
13049}
13050
13053 unsigned AtomicSize,
13054 unsigned BinOpcode,
13055 unsigned CmpOpcode,
13056 unsigned CmpPred) const {
13057 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13058 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13059
13060 auto LoadMnemonic = PPC::LDARX;
13061 auto StoreMnemonic = PPC::STDCX;
13062 switch (AtomicSize) {
13063 default:
13064 llvm_unreachable("Unexpected size of atomic entity");
13065 case 1:
13066 LoadMnemonic = PPC::LBARX;
13067 StoreMnemonic = PPC::STBCX;
13068 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13069 break;
13070 case 2:
13071 LoadMnemonic = PPC::LHARX;
13072 StoreMnemonic = PPC::STHCX;
13073 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13074 break;
13075 case 4:
13076 LoadMnemonic = PPC::LWARX;
13077 StoreMnemonic = PPC::STWCX;
13078 break;
13079 case 8:
13080 LoadMnemonic = PPC::LDARX;
13081 StoreMnemonic = PPC::STDCX;
13082 break;
13083 }
13084
13085 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13086 MachineFunction *F = BB->getParent();
13088
13089 Register dest = MI.getOperand(0).getReg();
13090 Register ptrA = MI.getOperand(1).getReg();
13091 Register ptrB = MI.getOperand(2).getReg();
13092 Register incr = MI.getOperand(3).getReg();
13093 DebugLoc dl = MI.getDebugLoc();
13094
13095 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13096 MachineBasicBlock *loop2MBB =
13097 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13098 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13099 F->insert(It, loopMBB);
13100 if (CmpOpcode)
13101 F->insert(It, loop2MBB);
13102 F->insert(It, exitMBB);
13103 exitMBB->splice(exitMBB->begin(), BB,
13104 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13106
13107 MachineRegisterInfo &RegInfo = F->getRegInfo();
13108 Register TmpReg = (!BinOpcode) ? incr :
13109 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
13110 : &PPC::GPRCRegClass);
13111
13112 // thisMBB:
13113 // ...
13114 // fallthrough --> loopMBB
13115 BB->addSuccessor(loopMBB);
13116
13117 // loopMBB:
13118 // l[wd]arx dest, ptr
13119 // add r0, dest, incr
13120 // st[wd]cx. r0, ptr
13121 // bne- loopMBB
13122 // fallthrough --> exitMBB
13123
13124 // For max/min...
13125 // loopMBB:
13126 // l[wd]arx dest, ptr
13127 // cmpl?[wd] dest, incr
13128 // bgt exitMBB
13129 // loop2MBB:
13130 // st[wd]cx. dest, ptr
13131 // bne- loopMBB
13132 // fallthrough --> exitMBB
13133
13134 BB = loopMBB;
13135 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13136 .addReg(ptrA).addReg(ptrB);
13137 if (BinOpcode)
13138 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13139 if (CmpOpcode) {
13140 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13141 // Signed comparisons of byte or halfword values must be sign-extended.
13142 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13143 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13144 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13145 ExtReg).addReg(dest);
13146 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13147 } else
13148 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13149
13150 BuildMI(BB, dl, TII->get(PPC::BCC))
13151 .addImm(CmpPred)
13152 .addReg(CrReg)
13153 .addMBB(exitMBB);
13154 BB->addSuccessor(loop2MBB);
13155 BB->addSuccessor(exitMBB);
13156 BB = loop2MBB;
13157 }
13158 BuildMI(BB, dl, TII->get(StoreMnemonic))
13159 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13160 BuildMI(BB, dl, TII->get(PPC::BCC))
13162 .addReg(PPC::CR0)
13163 .addMBB(loopMBB);
13164 BB->addSuccessor(loopMBB);
13165 BB->addSuccessor(exitMBB);
13166
13167 // exitMBB:
13168 // ...
13169 BB = exitMBB;
13170 return BB;
13171}
13172
13174 switch(MI.getOpcode()) {
13175 default:
13176 return false;
13177 case PPC::COPY:
13178 return TII->isSignExtended(MI.getOperand(1).getReg(),
13179 &MI.getMF()->getRegInfo());
13180 case PPC::LHA:
13181 case PPC::LHA8:
13182 case PPC::LHAU:
13183 case PPC::LHAU8:
13184 case PPC::LHAUX:
13185 case PPC::LHAUX8:
13186 case PPC::LHAX:
13187 case PPC::LHAX8:
13188 case PPC::LWA:
13189 case PPC::LWAUX:
13190 case PPC::LWAX:
13191 case PPC::LWAX_32:
13192 case PPC::LWA_32:
13193 case PPC::PLHA:
13194 case PPC::PLHA8:
13195 case PPC::PLHA8pc:
13196 case PPC::PLHApc:
13197 case PPC::PLWA:
13198 case PPC::PLWA8:
13199 case PPC::PLWA8pc:
13200 case PPC::PLWApc:
13201 case PPC::EXTSB:
13202 case PPC::EXTSB8:
13203 case PPC::EXTSB8_32_64:
13204 case PPC::EXTSB8_rec:
13205 case PPC::EXTSB_rec:
13206 case PPC::EXTSH:
13207 case PPC::EXTSH8:
13208 case PPC::EXTSH8_32_64:
13209 case PPC::EXTSH8_rec:
13210 case PPC::EXTSH_rec:
13211 case PPC::EXTSW:
13212 case PPC::EXTSWSLI:
13213 case PPC::EXTSWSLI_32_64:
13214 case PPC::EXTSWSLI_32_64_rec:
13215 case PPC::EXTSWSLI_rec:
13216 case PPC::EXTSW_32:
13217 case PPC::EXTSW_32_64:
13218 case PPC::EXTSW_32_64_rec:
13219 case PPC::EXTSW_rec:
13220 case PPC::SRAW:
13221 case PPC::SRAWI:
13222 case PPC::SRAWI_rec:
13223 case PPC::SRAW_rec:
13224 return true;
13225 }
13226 return false;
13227}
13228
13231 bool is8bit, // operation
13232 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13233 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13234 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13235
13236 // If this is a signed comparison and the value being compared is not known
13237 // to be sign extended, sign extend it here.
13238 DebugLoc dl = MI.getDebugLoc();
13239 MachineFunction *F = BB->getParent();
13240 MachineRegisterInfo &RegInfo = F->getRegInfo();
13241 Register incr = MI.getOperand(3).getReg();
13242 bool IsSignExtended =
13243 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
13244
13245 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13246 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13247 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13248 .addReg(MI.getOperand(3).getReg());
13249 MI.getOperand(3).setReg(ValueReg);
13250 incr = ValueReg;
13251 }
13252 // If we support part-word atomic mnemonics, just use them
13253 if (Subtarget.hasPartwordAtomics())
13254 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13255 CmpPred);
13256
13257 // In 64 bit mode we have to use 64 bits for addresses, even though the
13258 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13259 // registers without caring whether they're 32 or 64, but here we're
13260 // doing actual arithmetic on the addresses.
13261 bool is64bit = Subtarget.isPPC64();
13262 bool isLittleEndian = Subtarget.isLittleEndian();
13263 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13264
13265 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13267
13268 Register dest = MI.getOperand(0).getReg();
13269 Register ptrA = MI.getOperand(1).getReg();
13270 Register ptrB = MI.getOperand(2).getReg();
13271
13272 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13273 MachineBasicBlock *loop2MBB =
13274 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13275 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13276 F->insert(It, loopMBB);
13277 if (CmpOpcode)
13278 F->insert(It, loop2MBB);
13279 F->insert(It, exitMBB);
13280 exitMBB->splice(exitMBB->begin(), BB,
13281 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13283
13284 const TargetRegisterClass *RC =
13285 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13286 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13287
13288 Register PtrReg = RegInfo.createVirtualRegister(RC);
13289 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13290 Register ShiftReg =
13291 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13292 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13293 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13294 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13295 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13296 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13297 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13298 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13299 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13300 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13301 Register Ptr1Reg;
13302 Register TmpReg =
13303 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13304
13305 // thisMBB:
13306 // ...
13307 // fallthrough --> loopMBB
13308 BB->addSuccessor(loopMBB);
13309
13310 // The 4-byte load must be aligned, while a char or short may be
13311 // anywhere in the word. Hence all this nasty bookkeeping code.
13312 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13313 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13314 // xori shift, shift1, 24 [16]
13315 // rlwinm ptr, ptr1, 0, 0, 29
13316 // slw incr2, incr, shift
13317 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13318 // slw mask, mask2, shift
13319 // loopMBB:
13320 // lwarx tmpDest, ptr
13321 // add tmp, tmpDest, incr2
13322 // andc tmp2, tmpDest, mask
13323 // and tmp3, tmp, mask
13324 // or tmp4, tmp3, tmp2
13325 // stwcx. tmp4, ptr
13326 // bne- loopMBB
13327 // fallthrough --> exitMBB
13328 // srw SrwDest, tmpDest, shift
13329 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13330 if (ptrA != ZeroReg) {
13331 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13332 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13333 .addReg(ptrA)
13334 .addReg(ptrB);
13335 } else {
13336 Ptr1Reg = ptrB;
13337 }
13338 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13339 // mode.
13340 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13341 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
13342 .addImm(3)
13343 .addImm(27)
13344 .addImm(is8bit ? 28 : 27);
13345 if (!isLittleEndian)
13346 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13347 .addReg(Shift1Reg)
13348 .addImm(is8bit ? 24 : 16);
13349 if (is64bit)
13350 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13351 .addReg(Ptr1Reg)
13352 .addImm(0)
13353 .addImm(61);
13354 else
13355 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13356 .addReg(Ptr1Reg)
13357 .addImm(0)
13358 .addImm(0)
13359 .addImm(29);
13360 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13361 if (is8bit)
13362 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13363 else {
13364 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13365 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13366 .addReg(Mask3Reg)
13367 .addImm(65535);
13368 }
13369 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13370 .addReg(Mask2Reg)
13371 .addReg(ShiftReg);
13372
13373 BB = loopMBB;
13374 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13375 .addReg(ZeroReg)
13376 .addReg(PtrReg);
13377 if (BinOpcode)
13378 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13379 .addReg(Incr2Reg)
13380 .addReg(TmpDestReg);
13381 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13382 .addReg(TmpDestReg)
13383 .addReg(MaskReg);
13384 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13385 if (CmpOpcode) {
13386 // For unsigned comparisons, we can directly compare the shifted values.
13387 // For signed comparisons we shift and sign extend.
13388 Register SReg = RegInfo.createVirtualRegister(GPRC);
13389 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13390 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13391 .addReg(TmpDestReg)
13392 .addReg(MaskReg);
13393 unsigned ValueReg = SReg;
13394 unsigned CmpReg = Incr2Reg;
13395 if (CmpOpcode == PPC::CMPW) {
13396 ValueReg = RegInfo.createVirtualRegister(GPRC);
13397 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13398 .addReg(SReg)
13399 .addReg(ShiftReg);
13400 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13401 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13402 .addReg(ValueReg);
13403 ValueReg = ValueSReg;
13404 CmpReg = incr;
13405 }
13406 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13407 BuildMI(BB, dl, TII->get(PPC::BCC))
13408 .addImm(CmpPred)
13409 .addReg(CrReg)
13410 .addMBB(exitMBB);
13411 BB->addSuccessor(loop2MBB);
13412 BB->addSuccessor(exitMBB);
13413 BB = loop2MBB;
13414 }
13415 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13416 BuildMI(BB, dl, TII->get(PPC::STWCX))
13417 .addReg(Tmp4Reg)
13418 .addReg(ZeroReg)
13419 .addReg(PtrReg);
13420 BuildMI(BB, dl, TII->get(PPC::BCC))
13422 .addReg(PPC::CR0)
13423 .addMBB(loopMBB);
13424 BB->addSuccessor(loopMBB);
13425 BB->addSuccessor(exitMBB);
13426
13427 // exitMBB:
13428 // ...
13429 BB = exitMBB;
13430 // Since the shift amount is not a constant, we need to clear
13431 // the upper bits with a separate RLWINM.
13432 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13433 .addReg(SrwDestReg)
13434 .addImm(0)
13435 .addImm(is8bit ? 24 : 16)
13436 .addImm(31);
13437 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13438 .addReg(TmpDestReg)
13439 .addReg(ShiftReg);
13440 return BB;
13441}
13442
13445 MachineBasicBlock *MBB) const {
13446 DebugLoc DL = MI.getDebugLoc();
13447 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13448 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13449
13450 MachineFunction *MF = MBB->getParent();
13451 MachineRegisterInfo &MRI = MF->getRegInfo();
13452
13453 const BasicBlock *BB = MBB->getBasicBlock();
13454 MachineFunction::iterator I = ++MBB->getIterator();
13455
13456 Register DstReg = MI.getOperand(0).getReg();
13457 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13458 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13459 Register mainDstReg = MRI.createVirtualRegister(RC);
13460 Register restoreDstReg = MRI.createVirtualRegister(RC);
13461
13462 MVT PVT = getPointerTy(MF->getDataLayout());
13463 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13464 "Invalid Pointer Size!");
13465 // For v = setjmp(buf), we generate
13466 //
13467 // thisMBB:
13468 // SjLjSetup mainMBB
13469 // bl mainMBB
13470 // v_restore = 1
13471 // b sinkMBB
13472 //
13473 // mainMBB:
13474 // buf[LabelOffset] = LR
13475 // v_main = 0
13476 //
13477 // sinkMBB:
13478 // v = phi(main, restore)
13479 //
13480
13481 MachineBasicBlock *thisMBB = MBB;
13482 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13483 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13484 MF->insert(I, mainMBB);
13485 MF->insert(I, sinkMBB);
13486
13488
13489 // Transfer the remainder of BB and its successor edges to sinkMBB.
13490 sinkMBB->splice(sinkMBB->begin(), MBB,
13491 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13493
13494 // Note that the structure of the jmp_buf used here is not compatible
13495 // with that used by libc, and is not designed to be. Specifically, it
13496 // stores only those 'reserved' registers that LLVM does not otherwise
13497 // understand how to spill. Also, by convention, by the time this
13498 // intrinsic is called, Clang has already stored the frame address in the
13499 // first slot of the buffer and stack address in the third. Following the
13500 // X86 target code, we'll store the jump address in the second slot. We also
13501 // need to save the TOC pointer (R2) to handle jumps between shared
13502 // libraries, and that will be stored in the fourth slot. The thread
13503 // identifier (R13) is not affected.
13504
13505 // thisMBB:
13506 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13507 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13508 const int64_t BPOffset = 4 * PVT.getStoreSize();
13509
13510 // Prepare IP either in reg.
13511 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13512 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13513 Register BufReg = MI.getOperand(1).getReg();
13514
13515 if (Subtarget.is64BitELFABI()) {
13516 setUsesTOCBasePtr(*MBB->getParent());
13517 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13518 .addReg(PPC::X2)
13519 .addImm(TOCOffset)
13520 .addReg(BufReg)
13521 .cloneMemRefs(MI);
13522 }
13523
13524 // Naked functions never have a base pointer, and so we use r1. For all
13525 // other functions, this decision must be delayed until during PEI.
13526 unsigned BaseReg;
13527 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13528 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13529 else
13530 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13531
13532 MIB = BuildMI(*thisMBB, MI, DL,
13533 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13534 .addReg(BaseReg)
13535 .addImm(BPOffset)
13536 .addReg(BufReg)
13537 .cloneMemRefs(MI);
13538
13539 // Setup
13540 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13541 MIB.addRegMask(TRI->getNoPreservedMask());
13542
13543 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13544
13545 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13546 .addMBB(mainMBB);
13547 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13548
13549 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13550 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13551
13552 // mainMBB:
13553 // mainDstReg = 0
13554 MIB =
13555 BuildMI(mainMBB, DL,
13556 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13557
13558 // Store IP
13559 if (Subtarget.isPPC64()) {
13560 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13561 .addReg(LabelReg)
13562 .addImm(LabelOffset)
13563 .addReg(BufReg);
13564 } else {
13565 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13566 .addReg(LabelReg)
13567 .addImm(LabelOffset)
13568 .addReg(BufReg);
13569 }
13570 MIB.cloneMemRefs(MI);
13571
13572 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13573 mainMBB->addSuccessor(sinkMBB);
13574
13575 // sinkMBB:
13576 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13577 TII->get(PPC::PHI), DstReg)
13578 .addReg(mainDstReg).addMBB(mainMBB)
13579 .addReg(restoreDstReg).addMBB(thisMBB);
13580
13581 MI.eraseFromParent();
13582 return sinkMBB;
13583}
13584
13587 MachineBasicBlock *MBB) const {
13588 DebugLoc DL = MI.getDebugLoc();
13589 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13590
13591 MachineFunction *MF = MBB->getParent();
13592 MachineRegisterInfo &MRI = MF->getRegInfo();
13593
13594 MVT PVT = getPointerTy(MF->getDataLayout());
13595 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13596 "Invalid Pointer Size!");
13597
13598 const TargetRegisterClass *RC =
13599 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13600 Register Tmp = MRI.createVirtualRegister(RC);
13601 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13602 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13603 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13604 unsigned BP =
13605 (PVT == MVT::i64)
13606 ? PPC::X30
13607 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13608 : PPC::R30);
13609
13611
13612 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13613 const int64_t SPOffset = 2 * PVT.getStoreSize();
13614 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13615 const int64_t BPOffset = 4 * PVT.getStoreSize();
13616
13617 Register BufReg = MI.getOperand(0).getReg();
13618
13619 // Reload FP (the jumped-to function may not have had a
13620 // frame pointer, and if so, then its r31 will be restored
13621 // as necessary).
13622 if (PVT == MVT::i64) {
13623 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13624 .addImm(0)
13625 .addReg(BufReg);
13626 } else {
13627 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13628 .addImm(0)
13629 .addReg(BufReg);
13630 }
13631 MIB.cloneMemRefs(MI);
13632
13633 // Reload IP
13634 if (PVT == MVT::i64) {
13635 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13636 .addImm(LabelOffset)
13637 .addReg(BufReg);
13638 } else {
13639 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13640 .addImm(LabelOffset)
13641 .addReg(BufReg);
13642 }
13643 MIB.cloneMemRefs(MI);
13644
13645 // Reload SP
13646 if (PVT == MVT::i64) {
13647 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13648 .addImm(SPOffset)
13649 .addReg(BufReg);
13650 } else {
13651 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13652 .addImm(SPOffset)
13653 .addReg(BufReg);
13654 }
13655 MIB.cloneMemRefs(MI);
13656
13657 // Reload BP
13658 if (PVT == MVT::i64) {
13659 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13660 .addImm(BPOffset)
13661 .addReg(BufReg);
13662 } else {
13663 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13664 .addImm(BPOffset)
13665 .addReg(BufReg);
13666 }
13667 MIB.cloneMemRefs(MI);
13668
13669 // Reload TOC
13670 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13671 setUsesTOCBasePtr(*MBB->getParent());
13672 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13673 .addImm(TOCOffset)
13674 .addReg(BufReg)
13675 .cloneMemRefs(MI);
13676 }
13677
13678 // Jump
13679 BuildMI(*MBB, MI, DL,
13680 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13681 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13682
13683 MI.eraseFromParent();
13684 return MBB;
13685}
13686
13688 // If the function specifically requests inline stack probes, emit them.
13689 if (MF.getFunction().hasFnAttribute("probe-stack"))
13690 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13691 "inline-asm";
13692 return false;
13693}
13694
13696 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13697 unsigned StackAlign = TFI->getStackAlignment();
13698 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13699 "Unexpected stack alignment");
13700 // The default stack probe size is 4096 if the function has no
13701 // stack-probe-size attribute.
13702 const Function &Fn = MF.getFunction();
13703 unsigned StackProbeSize =
13704 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13705 // Round down to the stack alignment.
13706 StackProbeSize &= ~(StackAlign - 1);
13707 return StackProbeSize ? StackProbeSize : StackAlign;
13708}
13709
13710// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13711// into three phases. In the first phase, it uses pseudo instruction
13712// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13713// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13714// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13715// MaxCallFrameSize so that it can calculate correct data area pointer.
13718 MachineBasicBlock *MBB) const {
13719 const bool isPPC64 = Subtarget.isPPC64();
13720 MachineFunction *MF = MBB->getParent();
13721 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13722 DebugLoc DL = MI.getDebugLoc();
13723 const unsigned ProbeSize = getStackProbeSize(*MF);
13724 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13725 MachineRegisterInfo &MRI = MF->getRegInfo();
13726 // The CFG of probing stack looks as
13727 // +-----+
13728 // | MBB |
13729 // +--+--+
13730 // |
13731 // +----v----+
13732 // +--->+ TestMBB +---+
13733 // | +----+----+ |
13734 // | | |
13735 // | +-----v----+ |
13736 // +---+ BlockMBB | |
13737 // +----------+ |
13738 // |
13739 // +---------+ |
13740 // | TailMBB +<--+
13741 // +---------+
13742 // In MBB, calculate previous frame pointer and final stack pointer.
13743 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13744 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13745 // TailMBB is spliced via \p MI.
13746 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13747 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13748 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13749
13750 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13751 MF->insert(MBBIter, TestMBB);
13752 MF->insert(MBBIter, BlockMBB);
13753 MF->insert(MBBIter, TailMBB);
13754
13755 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13756 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13757
13758 Register DstReg = MI.getOperand(0).getReg();
13759 Register NegSizeReg = MI.getOperand(1).getReg();
13760 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13761 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13762 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13763 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13764
13765 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13766 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13767 // NegSize.
13768 unsigned ProbeOpc;
13769 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13770 ProbeOpc =
13771 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13772 else
13773 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13774 // and NegSizeReg will be allocated in the same phyreg to avoid
13775 // redundant copy when NegSizeReg has only one use which is current MI and
13776 // will be replaced by PREPARE_PROBED_ALLOCA then.
13777 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13778 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13779 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13780 .addDef(ActualNegSizeReg)
13781 .addReg(NegSizeReg)
13782 .add(MI.getOperand(2))
13783 .add(MI.getOperand(3));
13784
13785 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13786 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13787 FinalStackPtr)
13788 .addReg(SPReg)
13789 .addReg(ActualNegSizeReg);
13790
13791 // Materialize a scratch register for update.
13792 int64_t NegProbeSize = -(int64_t)ProbeSize;
13793 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13794 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13795 if (!isInt<16>(NegProbeSize)) {
13796 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13797 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13798 .addImm(NegProbeSize >> 16);
13799 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13800 ScratchReg)
13801 .addReg(TempReg)
13802 .addImm(NegProbeSize & 0xFFFF);
13803 } else
13804 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13805 .addImm(NegProbeSize);
13806
13807 {
13808 // Probing leading residual part.
13809 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13810 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13811 .addReg(ActualNegSizeReg)
13812 .addReg(ScratchReg);
13813 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13814 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13815 .addReg(Div)
13816 .addReg(ScratchReg);
13817 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13818 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13819 .addReg(Mul)
13820 .addReg(ActualNegSizeReg);
13821 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13822 .addReg(FramePointer)
13823 .addReg(SPReg)
13824 .addReg(NegMod);
13825 }
13826
13827 {
13828 // Remaining part should be multiple of ProbeSize.
13829 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13830 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13831 .addReg(SPReg)
13832 .addReg(FinalStackPtr);
13833 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13835 .addReg(CmpResult)
13836 .addMBB(TailMBB);
13837 TestMBB->addSuccessor(BlockMBB);
13838 TestMBB->addSuccessor(TailMBB);
13839 }
13840
13841 {
13842 // Touch the block.
13843 // |P...|P...|P...
13844 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13845 .addReg(FramePointer)
13846 .addReg(SPReg)
13847 .addReg(ScratchReg);
13848 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13849 BlockMBB->addSuccessor(TestMBB);
13850 }
13851
13852 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13853 // DYNAREAOFFSET pseudo instruction to get the future result.
13854 Register MaxCallFrameSizeReg =
13855 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13856 BuildMI(TailMBB, DL,
13857 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13858 MaxCallFrameSizeReg)
13859 .add(MI.getOperand(2))
13860 .add(MI.getOperand(3));
13861 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13862 .addReg(SPReg)
13863 .addReg(MaxCallFrameSizeReg);
13864
13865 // Splice instructions after MI to TailMBB.
13866 TailMBB->splice(TailMBB->end(), MBB,
13867 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13869 MBB->addSuccessor(TestMBB);
13870
13871 // Delete the pseudo instruction.
13872 MI.eraseFromParent();
13873
13874 ++NumDynamicAllocaProbed;
13875 return TailMBB;
13876}
13877
13879 switch (MI.getOpcode()) {
13880 case PPC::SELECT_CC_I4:
13881 case PPC::SELECT_CC_I8:
13882 case PPC::SELECT_CC_F4:
13883 case PPC::SELECT_CC_F8:
13884 case PPC::SELECT_CC_F16:
13885 case PPC::SELECT_CC_VRRC:
13886 case PPC::SELECT_CC_VSFRC:
13887 case PPC::SELECT_CC_VSSRC:
13888 case PPC::SELECT_CC_VSRC:
13889 case PPC::SELECT_CC_SPE4:
13890 case PPC::SELECT_CC_SPE:
13891 return true;
13892 default:
13893 return false;
13894 }
13895}
13896
13897static bool IsSelect(MachineInstr &MI) {
13898 switch (MI.getOpcode()) {
13899 case PPC::SELECT_I4:
13900 case PPC::SELECT_I8:
13901 case PPC::SELECT_F4:
13902 case PPC::SELECT_F8:
13903 case PPC::SELECT_F16:
13904 case PPC::SELECT_SPE:
13905 case PPC::SELECT_SPE4:
13906 case PPC::SELECT_VRRC:
13907 case PPC::SELECT_VSFRC:
13908 case PPC::SELECT_VSSRC:
13909 case PPC::SELECT_VSRC:
13910 return true;
13911 default:
13912 return false;
13913 }
13914}
13915
13918 MachineBasicBlock *BB) const {
13919 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13920 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13921 if (Subtarget.is64BitELFABI() &&
13922 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13923 !Subtarget.isUsingPCRelativeCalls()) {
13924 // Call lowering should have added an r2 operand to indicate a dependence
13925 // on the TOC base pointer value. It can't however, because there is no
13926 // way to mark the dependence as implicit there, and so the stackmap code
13927 // will confuse it with a regular operand. Instead, add the dependence
13928 // here.
13929 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13930 }
13931
13932 return emitPatchPoint(MI, BB);
13933 }
13934
13935 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13936 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13937 return emitEHSjLjSetJmp(MI, BB);
13938 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13939 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13940 return emitEHSjLjLongJmp(MI, BB);
13941 }
13942
13943 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13944
13945 // To "insert" these instructions we actually have to insert their
13946 // control-flow patterns.
13947 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13949
13950 MachineFunction *F = BB->getParent();
13951 MachineRegisterInfo &MRI = F->getRegInfo();
13952
13953 if (Subtarget.hasISEL() &&
13954 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13955 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13956 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13958 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13959 MI.getOpcode() == PPC::SELECT_CC_I8)
13960 Cond.push_back(MI.getOperand(4));
13961 else
13963 Cond.push_back(MI.getOperand(1));
13964
13965 DebugLoc dl = MI.getDebugLoc();
13966 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13967 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13968 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13969 // The incoming instruction knows the destination vreg to set, the
13970 // condition code register to branch on, the true/false values to
13971 // select between, and a branch opcode to use.
13972
13973 // thisMBB:
13974 // ...
13975 // TrueVal = ...
13976 // cmpTY ccX, r1, r2
13977 // bCC sinkMBB
13978 // fallthrough --> copy0MBB
13979 MachineBasicBlock *thisMBB = BB;
13980 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13981 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13982 DebugLoc dl = MI.getDebugLoc();
13983 F->insert(It, copy0MBB);
13984 F->insert(It, sinkMBB);
13985
13986 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
13987 copy0MBB->addLiveIn(PPC::CARRY);
13988 sinkMBB->addLiveIn(PPC::CARRY);
13989 }
13990
13991 // Set the call frame size on entry to the new basic blocks.
13992 // See https://reviews.llvm.org/D156113.
13993 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13994 copy0MBB->setCallFrameSize(CallFrameSize);
13995 sinkMBB->setCallFrameSize(CallFrameSize);
13996
13997 // Transfer the remainder of BB and its successor edges to sinkMBB.
13998 sinkMBB->splice(sinkMBB->begin(), BB,
13999 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14001
14002 // Next, add the true and fallthrough blocks as its successors.
14003 BB->addSuccessor(copy0MBB);
14004 BB->addSuccessor(sinkMBB);
14005
14006 if (IsSelect(MI)) {
14007 BuildMI(BB, dl, TII->get(PPC::BC))
14008 .addReg(MI.getOperand(1).getReg())
14009 .addMBB(sinkMBB);
14010 } else {
14011 unsigned SelectPred = MI.getOperand(4).getImm();
14012 BuildMI(BB, dl, TII->get(PPC::BCC))
14013 .addImm(SelectPred)
14014 .addReg(MI.getOperand(1).getReg())
14015 .addMBB(sinkMBB);
14016 }
14017
14018 // copy0MBB:
14019 // %FalseValue = ...
14020 // # fallthrough to sinkMBB
14021 BB = copy0MBB;
14022
14023 // Update machine-CFG edges
14024 BB->addSuccessor(sinkMBB);
14025
14026 // sinkMBB:
14027 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
14028 // ...
14029 BB = sinkMBB;
14030 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
14031 .addReg(MI.getOperand(3).getReg())
14032 .addMBB(copy0MBB)
14033 .addReg(MI.getOperand(2).getReg())
14034 .addMBB(thisMBB);
14035 } else if (MI.getOpcode() == PPC::ReadTB) {
14036 // To read the 64-bit time-base register on a 32-bit target, we read the
14037 // two halves. Should the counter have wrapped while it was being read, we
14038 // need to try again.
14039 // ...
14040 // readLoop:
14041 // mfspr Rx,TBU # load from TBU
14042 // mfspr Ry,TB # load from TB
14043 // mfspr Rz,TBU # load from TBU
14044 // cmpw crX,Rx,Rz # check if 'old'='new'
14045 // bne readLoop # branch if they're not equal
14046 // ...
14047
14048 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
14049 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
14050 DebugLoc dl = MI.getDebugLoc();
14051 F->insert(It, readMBB);
14052 F->insert(It, sinkMBB);
14053
14054 // Transfer the remainder of BB and its successor edges to sinkMBB.
14055 sinkMBB->splice(sinkMBB->begin(), BB,
14056 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14058
14059 BB->addSuccessor(readMBB);
14060 BB = readMBB;
14061
14062 MachineRegisterInfo &RegInfo = F->getRegInfo();
14063 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
14064 Register LoReg = MI.getOperand(0).getReg();
14065 Register HiReg = MI.getOperand(1).getReg();
14066
14067 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
14068 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
14069 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
14070
14071 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14072
14073 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
14074 .addReg(HiReg)
14075 .addReg(ReadAgainReg);
14076 BuildMI(BB, dl, TII->get(PPC::BCC))
14078 .addReg(CmpReg)
14079 .addMBB(readMBB);
14080
14081 BB->addSuccessor(readMBB);
14082 BB->addSuccessor(sinkMBB);
14083 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
14084 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
14085 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
14086 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
14087 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
14088 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
14089 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
14090 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
14091
14092 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
14093 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
14094 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
14095 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
14096 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
14097 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
14098 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
14099 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
14100
14101 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
14102 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
14103 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
14104 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
14105 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
14106 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
14107 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
14108 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
14109
14110 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
14111 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
14112 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
14113 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
14114 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14115 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
14116 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14117 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
14118
14119 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14120 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
14121 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14122 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
14123 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14124 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
14125 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14126 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
14127
14128 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14129 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
14130 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14131 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
14132 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14133 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
14134 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14135 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
14136
14137 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14138 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
14139 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14140 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
14141 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14142 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
14143 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14144 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
14145
14146 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14147 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
14148 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14149 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
14150 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14151 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
14152 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14153 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
14154
14155 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14156 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
14157 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14158 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
14159 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14160 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
14161 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14162 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
14163
14164 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14165 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
14166 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14167 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
14168 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14169 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
14170 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14171 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
14172
14173 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14174 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
14175 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14176 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
14177 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14178 BB = EmitAtomicBinary(MI, BB, 4, 0);
14179 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14180 BB = EmitAtomicBinary(MI, BB, 8, 0);
14181 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14182 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14183 (Subtarget.hasPartwordAtomics() &&
14184 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14185 (Subtarget.hasPartwordAtomics() &&
14186 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14187 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14188
14189 auto LoadMnemonic = PPC::LDARX;
14190 auto StoreMnemonic = PPC::STDCX;
14191 switch (MI.getOpcode()) {
14192 default:
14193 llvm_unreachable("Compare and swap of unknown size");
14194 case PPC::ATOMIC_CMP_SWAP_I8:
14195 LoadMnemonic = PPC::LBARX;
14196 StoreMnemonic = PPC::STBCX;
14197 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14198 break;
14199 case PPC::ATOMIC_CMP_SWAP_I16:
14200 LoadMnemonic = PPC::LHARX;
14201 StoreMnemonic = PPC::STHCX;
14202 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14203 break;
14204 case PPC::ATOMIC_CMP_SWAP_I32:
14205 LoadMnemonic = PPC::LWARX;
14206 StoreMnemonic = PPC::STWCX;
14207 break;
14208 case PPC::ATOMIC_CMP_SWAP_I64:
14209 LoadMnemonic = PPC::LDARX;
14210 StoreMnemonic = PPC::STDCX;
14211 break;
14212 }
14213 MachineRegisterInfo &RegInfo = F->getRegInfo();
14214 Register dest = MI.getOperand(0).getReg();
14215 Register ptrA = MI.getOperand(1).getReg();
14216 Register ptrB = MI.getOperand(2).getReg();
14217 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14218 Register oldval = MI.getOperand(3).getReg();
14219 Register newval = MI.getOperand(4).getReg();
14220 DebugLoc dl = MI.getDebugLoc();
14221
14222 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14223 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14224 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14225 F->insert(It, loop1MBB);
14226 F->insert(It, loop2MBB);
14227 F->insert(It, exitMBB);
14228 exitMBB->splice(exitMBB->begin(), BB,
14229 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14231
14232 // thisMBB:
14233 // ...
14234 // fallthrough --> loopMBB
14235 BB->addSuccessor(loop1MBB);
14236
14237 // loop1MBB:
14238 // l[bhwd]arx dest, ptr
14239 // cmp[wd] dest, oldval
14240 // bne- exitBB
14241 // loop2MBB:
14242 // st[bhwd]cx. newval, ptr
14243 // bne- loopMBB
14244 // b exitBB
14245 // exitBB:
14246 BB = loop1MBB;
14247 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14248 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14249 .addReg(dest)
14250 .addReg(oldval);
14251 BuildMI(BB, dl, TII->get(PPC::BCC))
14253 .addReg(CrReg)
14254 .addMBB(exitMBB);
14255 BB->addSuccessor(loop2MBB);
14256 BB->addSuccessor(exitMBB);
14257
14258 BB = loop2MBB;
14259 BuildMI(BB, dl, TII->get(StoreMnemonic))
14260 .addReg(newval)
14261 .addReg(ptrA)
14262 .addReg(ptrB);
14263 BuildMI(BB, dl, TII->get(PPC::BCC))
14265 .addReg(PPC::CR0)
14266 .addMBB(loop1MBB);
14267 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14268 BB->addSuccessor(loop1MBB);
14269 BB->addSuccessor(exitMBB);
14270
14271 // exitMBB:
14272 // ...
14273 BB = exitMBB;
14274 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14275 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14276 // We must use 64-bit registers for addresses when targeting 64-bit,
14277 // since we're actually doing arithmetic on them. Other registers
14278 // can be 32-bit.
14279 bool is64bit = Subtarget.isPPC64();
14280 bool isLittleEndian = Subtarget.isLittleEndian();
14281 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14282
14283 Register dest = MI.getOperand(0).getReg();
14284 Register ptrA = MI.getOperand(1).getReg();
14285 Register ptrB = MI.getOperand(2).getReg();
14286 Register oldval = MI.getOperand(3).getReg();
14287 Register newval = MI.getOperand(4).getReg();
14288 DebugLoc dl = MI.getDebugLoc();
14289
14290 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14291 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14292 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14293 F->insert(It, loop1MBB);
14294 F->insert(It, loop2MBB);
14295 F->insert(It, exitMBB);
14296 exitMBB->splice(exitMBB->begin(), BB,
14297 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14299
14300 MachineRegisterInfo &RegInfo = F->getRegInfo();
14301 const TargetRegisterClass *RC =
14302 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14303 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14304
14305 Register PtrReg = RegInfo.createVirtualRegister(RC);
14306 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
14307 Register ShiftReg =
14308 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
14309 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
14310 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
14311 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
14312 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
14313 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
14314 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
14315 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
14316 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
14317 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
14318 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
14319 Register Ptr1Reg;
14320 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
14321 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14322 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14323 // thisMBB:
14324 // ...
14325 // fallthrough --> loopMBB
14326 BB->addSuccessor(loop1MBB);
14327
14328 // The 4-byte load must be aligned, while a char or short may be
14329 // anywhere in the word. Hence all this nasty bookkeeping code.
14330 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14331 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14332 // xori shift, shift1, 24 [16]
14333 // rlwinm ptr, ptr1, 0, 0, 29
14334 // slw newval2, newval, shift
14335 // slw oldval2, oldval,shift
14336 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14337 // slw mask, mask2, shift
14338 // and newval3, newval2, mask
14339 // and oldval3, oldval2, mask
14340 // loop1MBB:
14341 // lwarx tmpDest, ptr
14342 // and tmp, tmpDest, mask
14343 // cmpw tmp, oldval3
14344 // bne- exitBB
14345 // loop2MBB:
14346 // andc tmp2, tmpDest, mask
14347 // or tmp4, tmp2, newval3
14348 // stwcx. tmp4, ptr
14349 // bne- loop1MBB
14350 // b exitBB
14351 // exitBB:
14352 // srw dest, tmpDest, shift
14353 if (ptrA != ZeroReg) {
14354 Ptr1Reg = RegInfo.createVirtualRegister(RC);
14355 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14356 .addReg(ptrA)
14357 .addReg(ptrB);
14358 } else {
14359 Ptr1Reg = ptrB;
14360 }
14361
14362 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14363 // mode.
14364 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14365 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
14366 .addImm(3)
14367 .addImm(27)
14368 .addImm(is8bit ? 28 : 27);
14369 if (!isLittleEndian)
14370 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14371 .addReg(Shift1Reg)
14372 .addImm(is8bit ? 24 : 16);
14373 if (is64bit)
14374 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14375 .addReg(Ptr1Reg)
14376 .addImm(0)
14377 .addImm(61);
14378 else
14379 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14380 .addReg(Ptr1Reg)
14381 .addImm(0)
14382 .addImm(0)
14383 .addImm(29);
14384 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14385 .addReg(newval)
14386 .addReg(ShiftReg);
14387 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14388 .addReg(oldval)
14389 .addReg(ShiftReg);
14390 if (is8bit)
14391 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14392 else {
14393 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14394 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14395 .addReg(Mask3Reg)
14396 .addImm(65535);
14397 }
14398 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14399 .addReg(Mask2Reg)
14400 .addReg(ShiftReg);
14401 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14402 .addReg(NewVal2Reg)
14403 .addReg(MaskReg);
14404 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14405 .addReg(OldVal2Reg)
14406 .addReg(MaskReg);
14407
14408 BB = loop1MBB;
14409 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14410 .addReg(ZeroReg)
14411 .addReg(PtrReg);
14412 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14413 .addReg(TmpDestReg)
14414 .addReg(MaskReg);
14415 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
14416 .addReg(TmpReg)
14417 .addReg(OldVal3Reg);
14418 BuildMI(BB, dl, TII->get(PPC::BCC))
14420 .addReg(CrReg)
14421 .addMBB(exitMBB);
14422 BB->addSuccessor(loop2MBB);
14423 BB->addSuccessor(exitMBB);
14424
14425 BB = loop2MBB;
14426 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14427 .addReg(TmpDestReg)
14428 .addReg(MaskReg);
14429 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14430 .addReg(Tmp2Reg)
14431 .addReg(NewVal3Reg);
14432 BuildMI(BB, dl, TII->get(PPC::STWCX))
14433 .addReg(Tmp4Reg)
14434 .addReg(ZeroReg)
14435 .addReg(PtrReg);
14436 BuildMI(BB, dl, TII->get(PPC::BCC))
14438 .addReg(PPC::CR0)
14439 .addMBB(loop1MBB);
14440 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14441 BB->addSuccessor(loop1MBB);
14442 BB->addSuccessor(exitMBB);
14443
14444 // exitMBB:
14445 // ...
14446 BB = exitMBB;
14447 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14448 .addReg(TmpReg)
14449 .addReg(ShiftReg);
14450 } else if (MI.getOpcode() == PPC::FADDrtz) {
14451 // This pseudo performs an FADD with rounding mode temporarily forced
14452 // to round-to-zero. We emit this via custom inserter since the FPSCR
14453 // is not modeled at the SelectionDAG level.
14454 Register Dest = MI.getOperand(0).getReg();
14455 Register Src1 = MI.getOperand(1).getReg();
14456 Register Src2 = MI.getOperand(2).getReg();
14457 DebugLoc dl = MI.getDebugLoc();
14458
14459 MachineRegisterInfo &RegInfo = F->getRegInfo();
14460 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14461
14462 // Save FPSCR value.
14463 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14464
14465 // Set rounding mode to round-to-zero.
14466 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14467 .addImm(31)
14469
14470 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14471 .addImm(30)
14473
14474 // Perform addition.
14475 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14476 .addReg(Src1)
14477 .addReg(Src2);
14478 if (MI.getFlag(MachineInstr::NoFPExcept))
14480
14481 // Restore FPSCR value.
14482 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14483 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14484 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14485 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14486 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14487 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14488 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14489 ? PPC::ANDI8_rec
14490 : PPC::ANDI_rec;
14491 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14492 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14493
14494 MachineRegisterInfo &RegInfo = F->getRegInfo();
14495 Register Dest = RegInfo.createVirtualRegister(
14496 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14497
14498 DebugLoc Dl = MI.getDebugLoc();
14499 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14500 .addReg(MI.getOperand(1).getReg())
14501 .addImm(1);
14502 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14503 MI.getOperand(0).getReg())
14504 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14505 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14506 DebugLoc Dl = MI.getDebugLoc();
14507 MachineRegisterInfo &RegInfo = F->getRegInfo();
14508 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14509 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14510 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14511 MI.getOperand(0).getReg())
14512 .addReg(CRReg);
14513 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14514 DebugLoc Dl = MI.getDebugLoc();
14515 unsigned Imm = MI.getOperand(1).getImm();
14516 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14517 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14518 MI.getOperand(0).getReg())
14519 .addReg(PPC::CR0EQ);
14520 } else if (MI.getOpcode() == PPC::SETRNDi) {
14521 DebugLoc dl = MI.getDebugLoc();
14522 Register OldFPSCRReg = MI.getOperand(0).getReg();
14523
14524 // Save FPSCR value.
14525 if (MRI.use_empty(OldFPSCRReg))
14526 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14527 else
14528 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14529
14530 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14531 // the following settings:
14532 // 00 Round to nearest
14533 // 01 Round to 0
14534 // 10 Round to +inf
14535 // 11 Round to -inf
14536
14537 // When the operand is immediate, using the two least significant bits of
14538 // the immediate to set the bits 62:63 of FPSCR.
14539 unsigned Mode = MI.getOperand(1).getImm();
14540 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14541 .addImm(31)
14543
14544 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14545 .addImm(30)
14547 } else if (MI.getOpcode() == PPC::SETRND) {
14548 DebugLoc dl = MI.getDebugLoc();
14549
14550 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14551 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14552 // If the target doesn't have DirectMove, we should use stack to do the
14553 // conversion, because the target doesn't have the instructions like mtvsrd
14554 // or mfvsrd to do this conversion directly.
14555 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14556 if (Subtarget.hasDirectMove()) {
14557 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14558 .addReg(SrcReg);
14559 } else {
14560 // Use stack to do the register copy.
14561 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14562 MachineRegisterInfo &RegInfo = F->getRegInfo();
14563 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14564 if (RC == &PPC::F8RCRegClass) {
14565 // Copy register from F8RCRegClass to G8RCRegclass.
14566 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14567 "Unsupported RegClass.");
14568
14569 StoreOp = PPC::STFD;
14570 LoadOp = PPC::LD;
14571 } else {
14572 // Copy register from G8RCRegClass to F8RCRegclass.
14573 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14574 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14575 "Unsupported RegClass.");
14576 }
14577
14578 MachineFrameInfo &MFI = F->getFrameInfo();
14579 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14580
14581 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14582 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14584 MFI.getObjectAlign(FrameIdx));
14585
14586 // Store the SrcReg into the stack.
14587 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14588 .addReg(SrcReg)
14589 .addImm(0)
14590 .addFrameIndex(FrameIdx)
14591 .addMemOperand(MMOStore);
14592
14593 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14594 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14596 MFI.getObjectAlign(FrameIdx));
14597
14598 // Load from the stack where SrcReg is stored, and save to DestReg,
14599 // so we have done the RegClass conversion from RegClass::SrcReg to
14600 // RegClass::DestReg.
14601 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14602 .addImm(0)
14603 .addFrameIndex(FrameIdx)
14604 .addMemOperand(MMOLoad);
14605 }
14606 };
14607
14608 Register OldFPSCRReg = MI.getOperand(0).getReg();
14609
14610 // Save FPSCR value.
14611 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14612
14613 // When the operand is gprc register, use two least significant bits of the
14614 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14615 //
14616 // copy OldFPSCRTmpReg, OldFPSCRReg
14617 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14618 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14619 // copy NewFPSCRReg, NewFPSCRTmpReg
14620 // mtfsf 255, NewFPSCRReg
14621 MachineOperand SrcOp = MI.getOperand(1);
14622 MachineRegisterInfo &RegInfo = F->getRegInfo();
14623 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14624
14625 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14626
14627 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14628 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14629
14630 // The first operand of INSERT_SUBREG should be a register which has
14631 // subregisters, we only care about its RegClass, so we should use an
14632 // IMPLICIT_DEF register.
14633 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14634 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14635 .addReg(ImDefReg)
14636 .add(SrcOp)
14637 .addImm(1);
14638
14639 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14640 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14641 .addReg(OldFPSCRTmpReg)
14642 .addReg(ExtSrcReg)
14643 .addImm(0)
14644 .addImm(62);
14645
14646 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14647 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14648
14649 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14650 // bits of FPSCR.
14651 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14652 .addImm(255)
14653 .addReg(NewFPSCRReg)
14654 .addImm(0)
14655 .addImm(0);
14656 } else if (MI.getOpcode() == PPC::SETFLM) {
14657 DebugLoc Dl = MI.getDebugLoc();
14658
14659 // Result of setflm is previous FPSCR content, so we need to save it first.
14660 Register OldFPSCRReg = MI.getOperand(0).getReg();
14661 if (MRI.use_empty(OldFPSCRReg))
14662 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14663 else
14664 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14665
14666 // Put bits in 32:63 to FPSCR.
14667 Register NewFPSCRReg = MI.getOperand(1).getReg();
14668 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14669 .addImm(255)
14670 .addReg(NewFPSCRReg)
14671 .addImm(0)
14672 .addImm(0);
14673 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14674 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14675 return emitProbedAlloca(MI, BB);
14676 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14677 DebugLoc DL = MI.getDebugLoc();
14678 Register Src = MI.getOperand(2).getReg();
14679 Register Lo = MI.getOperand(0).getReg();
14680 Register Hi = MI.getOperand(1).getReg();
14681 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14682 .addDef(Lo)
14683 .addUse(Src, {}, PPC::sub_gp8_x1);
14684 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14685 .addDef(Hi)
14686 .addUse(Src, {}, PPC::sub_gp8_x0);
14687 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14688 MI.getOpcode() == PPC::STQX_PSEUDO) {
14689 DebugLoc DL = MI.getDebugLoc();
14690 // Ptr is used as the ptr_rc_no_r0 part
14691 // of LQ/STQ's memory operand and adding result of RA and RB,
14692 // so it has to be g8rc_and_g8rc_nox0.
14693 Register Ptr =
14694 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14695 Register Val = MI.getOperand(0).getReg();
14696 Register RA = MI.getOperand(1).getReg();
14697 Register RB = MI.getOperand(2).getReg();
14698 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14699 BuildMI(*BB, MI, DL,
14700 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14701 : TII->get(PPC::STQ))
14702 .addReg(Val, getDefRegState(MI.getOpcode() == PPC::LQX_PSEUDO))
14703 .addImm(0)
14704 .addReg(Ptr);
14705 } else if (MI.getOpcode() == PPC::LWAT_PSEUDO ||
14706 MI.getOpcode() == PPC::LDAT_PSEUDO) {
14707 DebugLoc DL = MI.getDebugLoc();
14708 Register DstReg = MI.getOperand(0).getReg();
14709 Register PtrReg = MI.getOperand(1).getReg();
14710 Register ValReg = MI.getOperand(2).getReg();
14711 unsigned FC = MI.getOperand(3).getImm();
14712 bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO;
14713 Register Val64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14714 if (IsLwat)
14715 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), Val64)
14716 .addReg(ValReg)
14717 .addImm(PPC::sub_32);
14718 else
14719 Val64 = ValReg;
14720
14721 Register G8rPair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14722 Register UndefG8r = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14723 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), UndefG8r);
14724 BuildMI(*BB, MI, DL, TII->get(PPC::REG_SEQUENCE), G8rPair)
14725 .addReg(UndefG8r)
14726 .addImm(PPC::sub_gp8_x0)
14727 .addReg(Val64)
14728 .addImm(PPC::sub_gp8_x1);
14729
14730 Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14731 BuildMI(*BB, MI, DL, TII->get(IsLwat ? PPC::LWAT : PPC::LDAT), PairResult)
14732 .addReg(G8rPair)
14733 .addReg(PtrReg)
14734 .addImm(FC);
14735 Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14736 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
14737 .addReg(PairResult, {}, PPC::sub_gp8_x0);
14738 if (IsLwat)
14739 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14740 .addReg(Result64, {}, PPC::sub_32);
14741 else
14742 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14743 .addReg(Result64);
14744 } else if (MI.getOpcode() == PPC::LWAT_COND_PSEUDO ||
14745 MI.getOpcode() == PPC::LDAT_COND_PSEUDO) {
14746 DebugLoc DL = MI.getDebugLoc();
14747 Register DstReg = MI.getOperand(0).getReg();
14748 Register PtrReg = MI.getOperand(1).getReg();
14749 unsigned FC = MI.getOperand(2).getImm();
14750 bool IsLwat_Cond = MI.getOpcode() == PPC::LWAT_COND_PSEUDO;
14751
14752 Register Pair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14753 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Pair);
14754
14755 Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14756 BuildMI(*BB, MI, DL, TII->get(IsLwat_Cond ? PPC::LWAT : PPC::LDAT),
14757 PairResult)
14758 .addReg(Pair)
14759 .addReg(PtrReg)
14760 .addImm(FC);
14761 Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14762 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
14763 .addReg(PairResult, {}, PPC::sub_gp8_x0);
14764 if (IsLwat_Cond)
14765 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14766 .addReg(Result64, {}, PPC::sub_32);
14767 else
14768 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14769 .addReg(Result64);
14770 } else {
14771 llvm_unreachable("Unexpected instr type to insert");
14772 }
14773
14774 MI.eraseFromParent(); // The pseudo instruction is gone now.
14775 return BB;
14776}
14777
14778//===----------------------------------------------------------------------===//
14779// Target Optimization Hooks
14780//===----------------------------------------------------------------------===//
14781
14782static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14783 // For the estimates, convergence is quadratic, so we essentially double the
14784 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14785 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14786 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14787 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14788 if (VT.getScalarType() == MVT::f64)
14789 RefinementSteps++;
14790 return RefinementSteps;
14791}
14792
14793SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14794 const DenormalMode &Mode,
14795 SDNodeFlags Flags) const {
14796 // We only have VSX Vector Test for software Square Root.
14797 EVT VT = Op.getValueType();
14798 if (!isTypeLegal(MVT::i1) ||
14799 (VT != MVT::f64 &&
14800 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14801 return TargetLowering::getSqrtInputTest(Op, DAG, Mode, Flags);
14802
14803 SDLoc DL(Op);
14804 // The output register of FTSQRT is CR field.
14805 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op, Flags);
14806 // ftsqrt BF,FRB
14807 // Let e_b be the unbiased exponent of the double-precision
14808 // floating-point operand in register FRB.
14809 // fe_flag is set to 1 if either of the following conditions occurs.
14810 // - The double-precision floating-point operand in register FRB is a zero,
14811 // a NaN, or an infinity, or a negative value.
14812 // - e_b is less than or equal to -970.
14813 // Otherwise fe_flag is set to 0.
14814 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14815 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14816 // exponent is less than -970)
14817 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14818 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14819 FTSQRT, SRIdxVal),
14820 0);
14821}
14822
14823SDValue
14824PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14825 SelectionDAG &DAG) const {
14826 // We only have VSX Vector Square Root.
14827 EVT VT = Op.getValueType();
14828 if (VT != MVT::f64 &&
14829 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14831
14832 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14833}
14834
14835SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14836 int Enabled, int &RefinementSteps,
14837 bool &UseOneConstNR,
14838 bool Reciprocal) const {
14839 EVT VT = Operand.getValueType();
14840 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14841 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14842 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14843 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14844 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14845 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14846
14847 // The Newton-Raphson computation with a single constant does not provide
14848 // enough accuracy on some CPUs.
14849 UseOneConstNR = !Subtarget.needsTwoConstNR();
14850 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14851 }
14852 return SDValue();
14853}
14854
14855SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14856 int Enabled,
14857 int &RefinementSteps) const {
14858 EVT VT = Operand.getValueType();
14859 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14860 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14861 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14862 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14863 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14864 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14865 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14866 }
14867 return SDValue();
14868}
14869
14871 // Note: This functionality is used only when arcp is enabled, and
14872 // on cores with reciprocal estimates (which are used when arcp is
14873 // enabled for division), this functionality is redundant with the default
14874 // combiner logic (once the division -> reciprocal/multiply transformation
14875 // has taken place). As a result, this matters more for older cores than for
14876 // newer ones.
14877
14878 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14879 // reciprocal if there are two or more FDIVs (for embedded cores with only
14880 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14881 switch (Subtarget.getCPUDirective()) {
14882 default:
14883 return 3;
14884 case PPC::DIR_440:
14885 case PPC::DIR_A2:
14886 case PPC::DIR_E500:
14887 case PPC::DIR_E500mc:
14888 case PPC::DIR_E5500:
14889 return 2;
14890 }
14891}
14892
14893// isConsecutiveLSLoc needs to work even if all adds have not yet been
14894// collapsed, and so we need to look through chains of them.
14896 int64_t& Offset, SelectionDAG &DAG) {
14897 if (DAG.isBaseWithConstantOffset(Loc)) {
14898 Base = Loc.getOperand(0);
14899 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14900
14901 // The base might itself be a base plus an offset, and if so, accumulate
14902 // that as well.
14903 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
14904 }
14905}
14906
14908 unsigned Bytes, int Dist,
14909 SelectionDAG &DAG) {
14910 if (VT.getSizeInBits() / 8 != Bytes)
14911 return false;
14912
14913 SDValue BaseLoc = Base->getBasePtr();
14914 if (Loc.getOpcode() == ISD::FrameIndex) {
14915 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14916 return false;
14918 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14919 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14920 int FS = MFI.getObjectSize(FI);
14921 int BFS = MFI.getObjectSize(BFI);
14922 if (FS != BFS || FS != (int)Bytes) return false;
14923 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14924 }
14925
14926 SDValue Base1 = Loc, Base2 = BaseLoc;
14927 int64_t Offset1 = 0, Offset2 = 0;
14928 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14929 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14930 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14931 return true;
14932
14933 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14934 const GlobalValue *GV1 = nullptr;
14935 const GlobalValue *GV2 = nullptr;
14936 Offset1 = 0;
14937 Offset2 = 0;
14938 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14939 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14940 if (isGA1 && isGA2 && GV1 == GV2)
14941 return Offset1 == (Offset2 + Dist*Bytes);
14942 return false;
14943}
14944
14945// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14946// not enforce equality of the chain operands.
14948 unsigned Bytes, int Dist,
14949 SelectionDAG &DAG) {
14951 EVT VT = LS->getMemoryVT();
14952 SDValue Loc = LS->getBasePtr();
14953 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14954 }
14955
14956 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14957 EVT VT;
14958 switch (N->getConstantOperandVal(1)) {
14959 default: return false;
14960 case Intrinsic::ppc_altivec_lvx:
14961 case Intrinsic::ppc_altivec_lvxl:
14962 case Intrinsic::ppc_vsx_lxvw4x:
14963 case Intrinsic::ppc_vsx_lxvw4x_be:
14964 VT = MVT::v4i32;
14965 break;
14966 case Intrinsic::ppc_vsx_lxvd2x:
14967 case Intrinsic::ppc_vsx_lxvd2x_be:
14968 VT = MVT::v2f64;
14969 break;
14970 case Intrinsic::ppc_altivec_lvebx:
14971 VT = MVT::i8;
14972 break;
14973 case Intrinsic::ppc_altivec_lvehx:
14974 VT = MVT::i16;
14975 break;
14976 case Intrinsic::ppc_altivec_lvewx:
14977 VT = MVT::i32;
14978 break;
14979 }
14980
14981 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14982 }
14983
14984 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14985 EVT VT;
14986 switch (N->getConstantOperandVal(1)) {
14987 default: return false;
14988 case Intrinsic::ppc_altivec_stvx:
14989 case Intrinsic::ppc_altivec_stvxl:
14990 case Intrinsic::ppc_vsx_stxvw4x:
14991 VT = MVT::v4i32;
14992 break;
14993 case Intrinsic::ppc_vsx_stxvd2x:
14994 VT = MVT::v2f64;
14995 break;
14996 case Intrinsic::ppc_vsx_stxvw4x_be:
14997 VT = MVT::v4i32;
14998 break;
14999 case Intrinsic::ppc_vsx_stxvd2x_be:
15000 VT = MVT::v2f64;
15001 break;
15002 case Intrinsic::ppc_altivec_stvebx:
15003 VT = MVT::i8;
15004 break;
15005 case Intrinsic::ppc_altivec_stvehx:
15006 VT = MVT::i16;
15007 break;
15008 case Intrinsic::ppc_altivec_stvewx:
15009 VT = MVT::i32;
15010 break;
15011 }
15012
15013 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
15014 }
15015
15016 return false;
15017}
15018
15019// Return true is there is a nearyby consecutive load to the one provided
15020// (regardless of alignment). We search up and down the chain, looking though
15021// token factors and other loads (but nothing else). As a result, a true result
15022// indicates that it is safe to create a new consecutive load adjacent to the
15023// load provided.
15025 SDValue Chain = LD->getChain();
15026 EVT VT = LD->getMemoryVT();
15027
15028 SmallPtrSet<SDNode *, 16> LoadRoots;
15029 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
15031
15032 // First, search up the chain, branching to follow all token-factor operands.
15033 // If we find a consecutive load, then we're done, otherwise, record all
15034 // nodes just above the top-level loads and token factors.
15035 while (!Queue.empty()) {
15036 SDNode *ChainNext = Queue.pop_back_val();
15037 if (!Visited.insert(ChainNext).second)
15038 continue;
15039
15040 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
15041 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15042 return true;
15043
15044 if (!Visited.count(ChainLD->getChain().getNode()))
15045 Queue.push_back(ChainLD->getChain().getNode());
15046 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
15047 for (const SDUse &O : ChainNext->ops())
15048 if (!Visited.count(O.getNode()))
15049 Queue.push_back(O.getNode());
15050 } else
15051 LoadRoots.insert(ChainNext);
15052 }
15053
15054 // Second, search down the chain, starting from the top-level nodes recorded
15055 // in the first phase. These top-level nodes are the nodes just above all
15056 // loads and token factors. Starting with their uses, recursively look though
15057 // all loads (just the chain uses) and token factors to find a consecutive
15058 // load.
15059 Visited.clear();
15060 Queue.clear();
15061
15062 for (SDNode *I : LoadRoots) {
15063 Queue.push_back(I);
15064
15065 while (!Queue.empty()) {
15066 SDNode *LoadRoot = Queue.pop_back_val();
15067 if (!Visited.insert(LoadRoot).second)
15068 continue;
15069
15070 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
15071 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15072 return true;
15073
15074 for (SDNode *U : LoadRoot->users())
15075 if (((isa<MemSDNode>(U) &&
15076 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
15077 U->getOpcode() == ISD::TokenFactor) &&
15078 !Visited.count(U))
15079 Queue.push_back(U);
15080 }
15081 }
15082
15083 return false;
15084}
15085
15086/// This function is called when we have proved that a SETCC node can be replaced
15087/// by subtraction (and other supporting instructions) so that the result of
15088/// comparison is kept in a GPR instead of CR. This function is purely for
15089/// codegen purposes and has some flags to guide the codegen process.
15090static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
15091 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
15092 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15093
15094 // Zero extend the operands to the largest legal integer. Originally, they
15095 // must be of a strictly smaller size.
15096 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
15097 DAG.getConstant(Size, DL, MVT::i32));
15098 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
15099 DAG.getConstant(Size, DL, MVT::i32));
15100
15101 // Swap if needed. Depends on the condition code.
15102 if (Swap)
15103 std::swap(Op0, Op1);
15104
15105 // Subtract extended integers.
15106 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
15107
15108 // Move the sign bit to the least significant position and zero out the rest.
15109 // Now the least significant bit carries the result of original comparison.
15110 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
15111 DAG.getConstant(Size - 1, DL, MVT::i32));
15112 auto Final = Shifted;
15113
15114 // Complement the result if needed. Based on the condition code.
15115 if (Complement)
15116 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
15117 DAG.getConstant(1, DL, MVT::i64));
15118
15119 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
15120}
15121
15122SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
15123 DAGCombinerInfo &DCI) const {
15124 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15125
15126 SelectionDAG &DAG = DCI.DAG;
15127 SDLoc DL(N);
15128
15129 // Size of integers being compared has a critical role in the following
15130 // analysis, so we prefer to do this when all types are legal.
15131 if (!DCI.isAfterLegalizeDAG())
15132 return SDValue();
15133
15134 // If all users of SETCC extend its value to a legal integer type
15135 // then we replace SETCC with a subtraction
15136 for (const SDNode *U : N->users())
15137 if (U->getOpcode() != ISD::ZERO_EXTEND)
15138 return SDValue();
15139
15140 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15141 auto OpSize = N->getOperand(0).getValueSizeInBits();
15142
15144
15145 if (OpSize < Size) {
15146 switch (CC) {
15147 default: break;
15148 case ISD::SETULT:
15149 return generateEquivalentSub(N, Size, false, false, DL, DAG);
15150 case ISD::SETULE:
15151 return generateEquivalentSub(N, Size, true, true, DL, DAG);
15152 case ISD::SETUGT:
15153 return generateEquivalentSub(N, Size, false, true, DL, DAG);
15154 case ISD::SETUGE:
15155 return generateEquivalentSub(N, Size, true, false, DL, DAG);
15156 }
15157 }
15158
15159 return SDValue();
15160}
15161
15162SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15163 DAGCombinerInfo &DCI) const {
15164 SelectionDAG &DAG = DCI.DAG;
15165 SDLoc dl(N);
15166
15167 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15168 // If we're tracking CR bits, we need to be careful that we don't have:
15169 // trunc(binary-ops(zext(x), zext(y)))
15170 // or
15171 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15172 // such that we're unnecessarily moving things into GPRs when it would be
15173 // better to keep them in CR bits.
15174
15175 // Note that trunc here can be an actual i1 trunc, or can be the effective
15176 // truncation that comes from a setcc or select_cc.
15177 if (N->getOpcode() == ISD::TRUNCATE &&
15178 N->getValueType(0) != MVT::i1)
15179 return SDValue();
15180
15181 if (N->getOperand(0).getValueType() != MVT::i32 &&
15182 N->getOperand(0).getValueType() != MVT::i64)
15183 return SDValue();
15184
15185 if (N->getOpcode() == ISD::SETCC ||
15186 N->getOpcode() == ISD::SELECT_CC) {
15187 // If we're looking at a comparison, then we need to make sure that the
15188 // high bits (all except for the first) don't matter the result.
15189 ISD::CondCode CC =
15190 cast<CondCodeSDNode>(N->getOperand(
15191 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15192 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15193
15194 if (ISD::isSignedIntSetCC(CC)) {
15195 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15196 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15197 return SDValue();
15198 } else if (ISD::isUnsignedIntSetCC(CC)) {
15199 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15200 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15201 !DAG.MaskedValueIsZero(N->getOperand(1),
15202 APInt::getHighBitsSet(OpBits, OpBits-1)))
15203 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15204 : SDValue());
15205 } else {
15206 // This is neither a signed nor an unsigned comparison, just make sure
15207 // that the high bits are equal.
15208 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15209 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15210
15211 // We don't really care about what is known about the first bit (if
15212 // anything), so pretend that it is known zero for both to ensure they can
15213 // be compared as constants.
15214 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15215 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15216
15217 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15218 Op1Known.getConstant() != Op2Known.getConstant())
15219 return SDValue();
15220 }
15221 }
15222
15223 // We now know that the higher-order bits are irrelevant, we just need to
15224 // make sure that all of the intermediate operations are bit operations, and
15225 // all inputs are extensions.
15226 if (N->getOperand(0).getOpcode() != ISD::AND &&
15227 N->getOperand(0).getOpcode() != ISD::OR &&
15228 N->getOperand(0).getOpcode() != ISD::XOR &&
15229 N->getOperand(0).getOpcode() != ISD::SELECT &&
15230 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15231 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15232 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15233 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15234 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15235 return SDValue();
15236
15237 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15238 N->getOperand(1).getOpcode() != ISD::AND &&
15239 N->getOperand(1).getOpcode() != ISD::OR &&
15240 N->getOperand(1).getOpcode() != ISD::XOR &&
15241 N->getOperand(1).getOpcode() != ISD::SELECT &&
15242 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15243 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15244 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15245 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15246 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15247 return SDValue();
15248
15250 SmallVector<SDValue, 8> BinOps, PromOps;
15251 SmallPtrSet<SDNode *, 16> Visited;
15252
15253 for (unsigned i = 0; i < 2; ++i) {
15254 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15255 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15256 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15257 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15258 isa<ConstantSDNode>(N->getOperand(i)))
15259 Inputs.push_back(N->getOperand(i));
15260 else
15261 BinOps.push_back(N->getOperand(i));
15262
15263 if (N->getOpcode() == ISD::TRUNCATE)
15264 break;
15265 }
15266
15267 // Visit all inputs, collect all binary operations (and, or, xor and
15268 // select) that are all fed by extensions.
15269 while (!BinOps.empty()) {
15270 SDValue BinOp = BinOps.pop_back_val();
15271
15272 if (!Visited.insert(BinOp.getNode()).second)
15273 continue;
15274
15275 PromOps.push_back(BinOp);
15276
15277 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15278 // The condition of the select is not promoted.
15279 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15280 continue;
15281 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15282 continue;
15283
15284 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15285 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15286 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15287 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15288 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15289 Inputs.push_back(BinOp.getOperand(i));
15290 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15291 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15292 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15293 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15294 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15295 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15296 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15297 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15298 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15299 BinOps.push_back(BinOp.getOperand(i));
15300 } else {
15301 // We have an input that is not an extension or another binary
15302 // operation; we'll abort this transformation.
15303 return SDValue();
15304 }
15305 }
15306 }
15307
15308 // Make sure that this is a self-contained cluster of operations (which
15309 // is not quite the same thing as saying that everything has only one
15310 // use).
15311 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15312 if (isa<ConstantSDNode>(Inputs[i]))
15313 continue;
15314
15315 for (const SDNode *User : Inputs[i].getNode()->users()) {
15316 if (User != N && !Visited.count(User))
15317 return SDValue();
15318
15319 // Make sure that we're not going to promote the non-output-value
15320 // operand(s) or SELECT or SELECT_CC.
15321 // FIXME: Although we could sometimes handle this, and it does occur in
15322 // practice that one of the condition inputs to the select is also one of
15323 // the outputs, we currently can't deal with this.
15324 if (User->getOpcode() == ISD::SELECT) {
15325 if (User->getOperand(0) == Inputs[i])
15326 return SDValue();
15327 } else if (User->getOpcode() == ISD::SELECT_CC) {
15328 if (User->getOperand(0) == Inputs[i] ||
15329 User->getOperand(1) == Inputs[i])
15330 return SDValue();
15331 }
15332 }
15333 }
15334
15335 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15336 for (const SDNode *User : PromOps[i].getNode()->users()) {
15337 if (User != N && !Visited.count(User))
15338 return SDValue();
15339
15340 // Make sure that we're not going to promote the non-output-value
15341 // operand(s) or SELECT or SELECT_CC.
15342 // FIXME: Although we could sometimes handle this, and it does occur in
15343 // practice that one of the condition inputs to the select is also one of
15344 // the outputs, we currently can't deal with this.
15345 if (User->getOpcode() == ISD::SELECT) {
15346 if (User->getOperand(0) == PromOps[i])
15347 return SDValue();
15348 } else if (User->getOpcode() == ISD::SELECT_CC) {
15349 if (User->getOperand(0) == PromOps[i] ||
15350 User->getOperand(1) == PromOps[i])
15351 return SDValue();
15352 }
15353 }
15354 }
15355
15356 // Replace all inputs with the extension operand.
15357 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15358 // Constants may have users outside the cluster of to-be-promoted nodes,
15359 // and so we need to replace those as we do the promotions.
15360 if (isa<ConstantSDNode>(Inputs[i]))
15361 continue;
15362 else
15363 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15364 }
15365
15366 std::list<HandleSDNode> PromOpHandles;
15367 for (auto &PromOp : PromOps)
15368 PromOpHandles.emplace_back(PromOp);
15369
15370 // Replace all operations (these are all the same, but have a different
15371 // (i1) return type). DAG.getNode will validate that the types of
15372 // a binary operator match, so go through the list in reverse so that
15373 // we've likely promoted both operands first. Any intermediate truncations or
15374 // extensions disappear.
15375 while (!PromOpHandles.empty()) {
15376 SDValue PromOp = PromOpHandles.back().getValue();
15377 PromOpHandles.pop_back();
15378
15379 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15380 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15381 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15382 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15383 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15384 PromOp.getOperand(0).getValueType() != MVT::i1) {
15385 // The operand is not yet ready (see comment below).
15386 PromOpHandles.emplace_front(PromOp);
15387 continue;
15388 }
15389
15390 SDValue RepValue = PromOp.getOperand(0);
15391 if (isa<ConstantSDNode>(RepValue))
15392 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15393
15394 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15395 continue;
15396 }
15397
15398 unsigned C;
15399 switch (PromOp.getOpcode()) {
15400 default: C = 0; break;
15401 case ISD::SELECT: C = 1; break;
15402 case ISD::SELECT_CC: C = 2; break;
15403 }
15404
15405 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15406 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15407 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15408 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15409 // The to-be-promoted operands of this node have not yet been
15410 // promoted (this should be rare because we're going through the
15411 // list backward, but if one of the operands has several users in
15412 // this cluster of to-be-promoted nodes, it is possible).
15413 PromOpHandles.emplace_front(PromOp);
15414 continue;
15415 }
15416
15418
15419 // If there are any constant inputs, make sure they're replaced now.
15420 for (unsigned i = 0; i < 2; ++i)
15421 if (isa<ConstantSDNode>(Ops[C+i]))
15422 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15423
15424 DAG.ReplaceAllUsesOfValueWith(PromOp,
15425 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15426 }
15427
15428 // Now we're left with the initial truncation itself.
15429 if (N->getOpcode() == ISD::TRUNCATE)
15430 return N->getOperand(0);
15431
15432 // Otherwise, this is a comparison. The operands to be compared have just
15433 // changed type (to i1), but everything else is the same.
15434 return SDValue(N, 0);
15435}
15436
15437SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15438 DAGCombinerInfo &DCI) const {
15439 SelectionDAG &DAG = DCI.DAG;
15440 SDLoc dl(N);
15441
15442 // If we're tracking CR bits, we need to be careful that we don't have:
15443 // zext(binary-ops(trunc(x), trunc(y)))
15444 // or
15445 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15446 // such that we're unnecessarily moving things into CR bits that can more
15447 // efficiently stay in GPRs. Note that if we're not certain that the high
15448 // bits are set as required by the final extension, we still may need to do
15449 // some masking to get the proper behavior.
15450
15451 // This same functionality is important on PPC64 when dealing with
15452 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15453 // the return values of functions. Because it is so similar, it is handled
15454 // here as well.
15455
15456 if (N->getValueType(0) != MVT::i32 &&
15457 N->getValueType(0) != MVT::i64)
15458 return SDValue();
15459
15460 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15461 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15462 return SDValue();
15463
15464 if (N->getOperand(0).getOpcode() != ISD::AND &&
15465 N->getOperand(0).getOpcode() != ISD::OR &&
15466 N->getOperand(0).getOpcode() != ISD::XOR &&
15467 N->getOperand(0).getOpcode() != ISD::SELECT &&
15468 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15469 return SDValue();
15470
15472 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15473 SmallPtrSet<SDNode *, 16> Visited;
15474
15475 // Visit all inputs, collect all binary operations (and, or, xor and
15476 // select) that are all fed by truncations.
15477 while (!BinOps.empty()) {
15478 SDValue BinOp = BinOps.pop_back_val();
15479
15480 if (!Visited.insert(BinOp.getNode()).second)
15481 continue;
15482
15483 PromOps.push_back(BinOp);
15484
15485 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15486 // The condition of the select is not promoted.
15487 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15488 continue;
15489 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15490 continue;
15491
15492 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15493 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15494 Inputs.push_back(BinOp.getOperand(i));
15495 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15496 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15497 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15498 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15499 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15500 BinOps.push_back(BinOp.getOperand(i));
15501 } else {
15502 // We have an input that is not a truncation or another binary
15503 // operation; we'll abort this transformation.
15504 return SDValue();
15505 }
15506 }
15507 }
15508
15509 // The operands of a select that must be truncated when the select is
15510 // promoted because the operand is actually part of the to-be-promoted set.
15511 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15512
15513 // Make sure that this is a self-contained cluster of operations (which
15514 // is not quite the same thing as saying that everything has only one
15515 // use).
15516 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15517 if (isa<ConstantSDNode>(Inputs[i]))
15518 continue;
15519
15520 for (SDNode *User : Inputs[i].getNode()->users()) {
15521 if (User != N && !Visited.count(User))
15522 return SDValue();
15523
15524 // If we're going to promote the non-output-value operand(s) or SELECT or
15525 // SELECT_CC, record them for truncation.
15526 if (User->getOpcode() == ISD::SELECT) {
15527 if (User->getOperand(0) == Inputs[i])
15528 SelectTruncOp[0].insert(std::make_pair(User,
15529 User->getOperand(0).getValueType()));
15530 } else if (User->getOpcode() == ISD::SELECT_CC) {
15531 if (User->getOperand(0) == Inputs[i])
15532 SelectTruncOp[0].insert(std::make_pair(User,
15533 User->getOperand(0).getValueType()));
15534 if (User->getOperand(1) == Inputs[i])
15535 SelectTruncOp[1].insert(std::make_pair(User,
15536 User->getOperand(1).getValueType()));
15537 }
15538 }
15539 }
15540
15541 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15542 for (SDNode *User : PromOps[i].getNode()->users()) {
15543 if (User != N && !Visited.count(User))
15544 return SDValue();
15545
15546 // If we're going to promote the non-output-value operand(s) or SELECT or
15547 // SELECT_CC, record them for truncation.
15548 if (User->getOpcode() == ISD::SELECT) {
15549 if (User->getOperand(0) == PromOps[i])
15550 SelectTruncOp[0].insert(std::make_pair(User,
15551 User->getOperand(0).getValueType()));
15552 } else if (User->getOpcode() == ISD::SELECT_CC) {
15553 if (User->getOperand(0) == PromOps[i])
15554 SelectTruncOp[0].insert(std::make_pair(User,
15555 User->getOperand(0).getValueType()));
15556 if (User->getOperand(1) == PromOps[i])
15557 SelectTruncOp[1].insert(std::make_pair(User,
15558 User->getOperand(1).getValueType()));
15559 }
15560 }
15561 }
15562
15563 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15564 bool ReallyNeedsExt = false;
15565 if (N->getOpcode() != ISD::ANY_EXTEND) {
15566 // If all of the inputs are not already sign/zero extended, then
15567 // we'll still need to do that at the end.
15568 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15569 if (isa<ConstantSDNode>(Inputs[i]))
15570 continue;
15571
15572 unsigned OpBits =
15573 Inputs[i].getOperand(0).getValueSizeInBits();
15574 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15575
15576 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15577 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15578 APInt::getHighBitsSet(OpBits,
15579 OpBits-PromBits))) ||
15580 (N->getOpcode() == ISD::SIGN_EXTEND &&
15581 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15582 (OpBits-(PromBits-1)))) {
15583 ReallyNeedsExt = true;
15584 break;
15585 }
15586 }
15587 }
15588
15589 // Convert PromOps to handles before doing any RAUW operations, as these
15590 // may CSE with existing nodes, deleting the originals.
15591 std::list<HandleSDNode> PromOpHandles;
15592 for (auto &PromOp : PromOps)
15593 PromOpHandles.emplace_back(PromOp);
15594
15595 // Replace all inputs, either with the truncation operand, or a
15596 // truncation or extension to the final output type.
15597 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15598 // Constant inputs need to be replaced with the to-be-promoted nodes that
15599 // use them because they might have users outside of the cluster of
15600 // promoted nodes.
15601 if (isa<ConstantSDNode>(Inputs[i]))
15602 continue;
15603
15604 SDValue InSrc = Inputs[i].getOperand(0);
15605 if (Inputs[i].getValueType() == N->getValueType(0))
15606 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15607 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15608 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15609 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15610 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15611 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15612 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15613 else
15614 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15615 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15616 }
15617
15618 // Replace all operations (these are all the same, but have a different
15619 // (promoted) return type). DAG.getNode will validate that the types of
15620 // a binary operator match, so go through the list in reverse so that
15621 // we've likely promoted both operands first.
15622 while (!PromOpHandles.empty()) {
15623 SDValue PromOp = PromOpHandles.back().getValue();
15624 PromOpHandles.pop_back();
15625
15626 unsigned C;
15627 switch (PromOp.getOpcode()) {
15628 default: C = 0; break;
15629 case ISD::SELECT: C = 1; break;
15630 case ISD::SELECT_CC: C = 2; break;
15631 }
15632
15633 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15634 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15635 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15636 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15637 // The to-be-promoted operands of this node have not yet been
15638 // promoted (this should be rare because we're going through the
15639 // list backward, but if one of the operands has several users in
15640 // this cluster of to-be-promoted nodes, it is possible).
15641 PromOpHandles.emplace_front(PromOp);
15642 continue;
15643 }
15644
15645 // For SELECT and SELECT_CC nodes, we do a similar check for any
15646 // to-be-promoted comparison inputs.
15647 if (PromOp.getOpcode() == ISD::SELECT ||
15648 PromOp.getOpcode() == ISD::SELECT_CC) {
15649 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15650 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15651 (SelectTruncOp[1].count(PromOp.getNode()) &&
15652 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15653 PromOpHandles.emplace_front(PromOp);
15654 continue;
15655 }
15656 }
15657
15659
15660 // If this node has constant inputs, then they'll need to be promoted here.
15661 for (unsigned i = 0; i < 2; ++i) {
15662 if (!isa<ConstantSDNode>(Ops[C+i]))
15663 continue;
15664 if (Ops[C+i].getValueType() == N->getValueType(0))
15665 continue;
15666
15667 if (N->getOpcode() == ISD::SIGN_EXTEND)
15668 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15669 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15670 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15671 else
15672 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15673 }
15674
15675 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15676 // truncate them again to the original value type.
15677 if (PromOp.getOpcode() == ISD::SELECT ||
15678 PromOp.getOpcode() == ISD::SELECT_CC) {
15679 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15680 if (SI0 != SelectTruncOp[0].end())
15681 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15682 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15683 if (SI1 != SelectTruncOp[1].end())
15684 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15685 }
15686
15687 DAG.ReplaceAllUsesOfValueWith(PromOp,
15688 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15689 }
15690
15691 // Now we're left with the initial extension itself.
15692 if (!ReallyNeedsExt)
15693 return N->getOperand(0);
15694
15695 // To zero extend, just mask off everything except for the first bit (in the
15696 // i1 case).
15697 if (N->getOpcode() == ISD::ZERO_EXTEND)
15698 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15700 N->getValueSizeInBits(0), PromBits),
15701 dl, N->getValueType(0)));
15702
15703 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15704 "Invalid extension type");
15705 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15706 SDValue ShiftCst =
15707 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15708 return DAG.getNode(
15709 ISD::SRA, dl, N->getValueType(0),
15710 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15711 ShiftCst);
15712}
15713
15714// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15716
15717 auto isValidForConvert = [](SDValue &Operand) {
15718 if (!Operand.hasOneUse())
15719 return false;
15720
15721 if (Operand.getValueType() != MVT::i128)
15722 return false;
15723
15724 if (Operand.getOpcode() == ISD::Constant)
15725 return true;
15726
15727 auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
15728 if (!LoadNode)
15729 return false;
15730
15731 // If memory operation is volatile, do not perform any
15732 // optimization or transformation. Volatile operations must be preserved
15733 // as written to ensure correct program behavior, so we return an empty
15734 // SDValue to indicate no action.
15735
15736 if (LoadNode->isVolatile())
15737 return false;
15738
15739 // Only combine loads if both use the unindexed addressing mode.
15740 // PowerPC AltiVec/VMX does not support vector loads or stores with
15741 // pre/post-increment addressing. Indexed modes may imply implicit
15742 // pointer updates, which are not compatible with AltiVec vector
15743 // instructions.
15744 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15745 return false;
15746
15747 // Only combine loads if both are non-extending loads
15748 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15749 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15750 // loaded value's semantics and are not compatible with vector loads.
15751 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15752 return false;
15753
15754 return true;
15755 };
15756
15757 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15758}
15759
15761 const SDLoc &DL) {
15762
15763 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15764
15765 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15766 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15767 "CC mus be ISD::SETNE or ISD::SETEQ");
15768
15769 auto getV16i8Load = [&](const SDValue &Operand) {
15770 if (Operand.getOpcode() == ISD::Constant)
15771 return DAG.getBitcast(MVT::v16i8, Operand);
15772
15773 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15774
15775 auto *LoadNode = cast<LoadSDNode>(Operand);
15776 SDValue NewLoad =
15777 DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
15778 LoadNode->getBasePtr(), LoadNode->getMemOperand());
15779 DAG.ReplaceAllUsesOfValueWith(Operand.getValue(1), NewLoad.getValue(1));
15780 return NewLoad;
15781 };
15782
15783 // Following code transforms the DAG
15784 // t0: ch,glue = EntryToken
15785 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15786 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15787 // undef:i64
15788 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15789 // t5: i128,ch =
15790 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15791 // setcc t3, t5, setne:ch
15792 //
15793 // ---->
15794 //
15795 // t0: ch,glue = EntryToken
15796 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15797 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15798 // undef:i64
15799 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15800 // t5: v16i8,ch =
15801 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15802 // t6: i32 =
15803 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15804 // Constant:i32<2>, t3, t5
15805 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15806
15807 // Or transforms the DAG
15808 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15809 // t8: i1 =
15810 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15811 //
15812 // --->
15813 //
15814 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15815 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15816 // t7: i32 =
15817 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15818
15819 SDValue LHSVec = getV16i8Load(N->getOperand(0));
15820 SDValue RHSVec = getV16i8Load(N->getOperand(1));
15821
15822 SDValue IntrID =
15823 DAG.getConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL, MVT::i32);
15824 SDValue CRSel = DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15825 SDValue PredResult = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
15826 IntrID, CRSel, LHSVec, RHSVec);
15827 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15828 // so we need to invert the CC opcode.
15829 return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15830 DAG.getConstant(0, DL, MVT::i32),
15831 CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15832}
15833
15834// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
15835// If it is , return true; otherwise return false.
15837 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15838
15839 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15840 if (CC != ISD::SETEQ)
15841 return false;
15842
15843 SDValue LHS = N->getOperand(0);
15844 SDValue RHS = N->getOperand(1);
15845
15846 // Check the `SDValue &V` is from `and` with `1`.
15847 auto IsAndWithOne = [](SDValue &V) {
15848 if (V.getOpcode() == ISD::AND) {
15849 for (const SDValue &Op : V->ops())
15850 if (auto *C = dyn_cast<ConstantSDNode>(Op))
15851 if (C->isOne())
15852 return true;
15853 }
15854 return false;
15855 };
15856
15857 // Check whether the SETCC compare with zero.
15858 auto IsCompareWithZero = [](SDValue &V) {
15859 if (auto *C = dyn_cast<ConstantSDNode>(V))
15860 if (C->isZero())
15861 return true;
15862 return false;
15863 };
15864
15865 return (IsAndWithOne(LHS) && IsCompareWithZero(RHS)) ||
15866 (IsAndWithOne(RHS) && IsCompareWithZero(LHS));
15867}
15868
15869// You must check whether the `SDNode* N` can be converted to Xori using
15870// the function `static bool canConvertSETCCToXori(SDNode *N)`
15871// before calling the function; otherwise, it may produce incorrect results.
15873
15874 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15875 SDValue LHS = N->getOperand(0);
15876 SDValue RHS = N->getOperand(1);
15877 SDLoc DL(N);
15878
15879 [[maybe_unused]] ISD::CondCode CC =
15880 cast<CondCodeSDNode>(N->getOperand(2))->get();
15881 assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
15882 // Rewrite it as XORI (and X, 1), 1.
15883 auto MakeXor1 = [&](SDValue V) {
15884 EVT VT = V.getValueType();
15885 SDValue One = DAG.getConstant(1, DL, VT);
15886 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, V, One);
15887 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Xor);
15888 };
15889
15890 if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
15891 return MakeXor1(LHS);
15892
15893 if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
15894 return MakeXor1(RHS);
15895
15896 llvm_unreachable("Should not reach here.");
15897}
15898
15899SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15900 DAGCombinerInfo &DCI) const {
15901 assert(N->getOpcode() == ISD::SETCC &&
15902 "Should be called with a SETCC node");
15903
15904 // Check if the pattern (setcc (and X, 1), 0, eq) is present.
15905 // If it is, rewrite it as XORI (and X, 1), 1.
15907 return ConvertSETCCToXori(N, DCI.DAG);
15908
15909 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15910 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15911 SDValue LHS = N->getOperand(0);
15912 SDValue RHS = N->getOperand(1);
15913
15914 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15915 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
15916 LHS.hasOneUse())
15917 std::swap(LHS, RHS);
15918
15919 // x == 0-y --> x+y == 0
15920 // x != 0-y --> x+y != 0
15921 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
15922 RHS.hasOneUse()) {
15923 SDLoc DL(N);
15924 SelectionDAG &DAG = DCI.DAG;
15925 EVT VT = N->getValueType(0);
15926 EVT OpVT = LHS.getValueType();
15927 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
15928 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
15929 }
15930
15931 // Optimization: Fold i128 equality/inequality compares of two loads into a
15932 // vectorized compare using vcmpequb.p when Altivec is available.
15933 //
15934 // Rationale:
15935 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
15936 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
15937 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
15938 // perform a full 128-bit equality check in a single vector compare.
15939 //
15940 // Example Result:
15941 // This transformation replaces memcmp(a, b, 16) with two vector loads
15942 // and one vector compare instruction.
15943
15944 if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
15945 return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
15946 }
15947
15948 return DAGCombineTruncBoolExt(N, DCI);
15949}
15950
15951// Is this an extending load from an f32 to an f64?
15952static bool isFPExtLoad(SDValue Op) {
15953 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
15954 return LD->getExtensionType() == ISD::EXTLOAD &&
15955 Op.getValueType() == MVT::f64;
15956 return false;
15957}
15958
15959/// Reduces the number of fp-to-int conversion when building a vector.
15960///
15961/// If this vector is built out of floating to integer conversions,
15962/// transform it to a vector built out of floating point values followed by a
15963/// single floating to integer conversion of the vector.
15964/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15965/// becomes (fptosi (build_vector ($A, $B, ...)))
15966SDValue PPCTargetLowering::
15967combineElementTruncationToVectorTruncation(SDNode *N,
15968 DAGCombinerInfo &DCI) const {
15969 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15970 "Should be called with a BUILD_VECTOR node");
15971
15972 SelectionDAG &DAG = DCI.DAG;
15973 SDLoc dl(N);
15974
15975 SDValue FirstInput = N->getOperand(0);
15976 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15977 "The input operand must be an fp-to-int conversion.");
15978
15979 // This combine happens after legalization so the fp_to_[su]i nodes are
15980 // already converted to PPCSISD nodes.
15981 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
15982 if (FirstConversion == PPCISD::FCTIDZ ||
15983 FirstConversion == PPCISD::FCTIDUZ ||
15984 FirstConversion == PPCISD::FCTIWZ ||
15985 FirstConversion == PPCISD::FCTIWUZ) {
15986 bool IsSplat = true;
15987 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15988 FirstConversion == PPCISD::FCTIWUZ;
15989 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15991 EVT TargetVT = N->getValueType(0);
15992 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15993 SDValue NextOp = N->getOperand(i);
15994 if (NextOp.getOpcode() != PPCISD::MFVSR)
15995 return SDValue();
15996 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15997 if (NextConversion != FirstConversion)
15998 return SDValue();
15999 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
16000 // This is not valid if the input was originally double precision. It is
16001 // also not profitable to do unless this is an extending load in which
16002 // case doing this combine will allow us to combine consecutive loads.
16003 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
16004 return SDValue();
16005 if (N->getOperand(i) != FirstInput)
16006 IsSplat = false;
16007 }
16008
16009 // If this is a splat, we leave it as-is since there will be only a single
16010 // fp-to-int conversion followed by a splat of the integer. This is better
16011 // for 32-bit and smaller ints and neutral for 64-bit ints.
16012 if (IsSplat)
16013 return SDValue();
16014
16015 // Now that we know we have the right type of node, get its operands
16016 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
16017 SDValue In = N->getOperand(i).getOperand(0);
16018 if (Is32Bit) {
16019 // For 32-bit values, we need to add an FP_ROUND node (if we made it
16020 // here, we know that all inputs are extending loads so this is safe).
16021 if (In.isUndef())
16022 Ops.push_back(DAG.getUNDEF(SrcVT));
16023 else {
16024 SDValue Trunc =
16025 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
16026 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
16027 Ops.push_back(Trunc);
16028 }
16029 } else
16030 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
16031 }
16032
16033 unsigned Opcode;
16034 if (FirstConversion == PPCISD::FCTIDZ ||
16035 FirstConversion == PPCISD::FCTIWZ)
16036 Opcode = ISD::FP_TO_SINT;
16037 else
16038 Opcode = ISD::FP_TO_UINT;
16039
16040 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
16041 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
16042 return DAG.getNode(Opcode, dl, TargetVT, BV);
16043 }
16044 return SDValue();
16045}
16046
16047// LXVKQ instruction load VSX vector with a special quadword value
16048// based on an immediate value. This helper method returns the details of the
16049// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
16050// to help generate the LXVKQ instruction and the subsequent shift instruction
16051// required to match the original build vector pattern.
16052
16053// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
16054using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
16055
16056static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
16057
16058 // LXVKQ instruction loads the Quadword value:
16059 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
16060 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
16061 static const uint32_t Uim = 16;
16062
16063 // Check for direct LXVKQ match (no shift needed)
16064 if (FullVal == BasePattern)
16065 return std::make_tuple(Uim, uint8_t{0});
16066
16067 // Check if FullValue is 1 (the result of the base pattern >> 127)
16068 if (FullVal == APInt(128, 1))
16069 return std::make_tuple(Uim, uint8_t{127});
16070
16071 return std::nullopt;
16072}
16073
16074/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
16075/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
16076/// LXVKQ instruction load VSX vector with a special quadword value based on an
16077/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
16078/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
16079/// This can be used to inline the build vector constants that have the
16080/// following patterns:
16081///
16082/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
16083/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
16084/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
16085/// combination of splatting and right shift instructions.
16086
16087SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
16088 SelectionDAG &DAG) const {
16089
16090 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
16091 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
16092
16093 // This transformation is only supported if we are loading either a byte,
16094 // halfword, word, or doubleword.
16095 EVT VT = Op.getValueType();
16096 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
16097 VT == MVT::v2i64))
16098 return SDValue();
16099
16100 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
16101 << VT.getEVTString() << "): ";
16102 Op->dump());
16103
16104 unsigned NumElems = VT.getVectorNumElements();
16105 unsigned ElemBits = VT.getScalarSizeInBits();
16106
16107 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
16108
16109 // Check for Non-constant operand in the build vector.
16110 for (const SDValue &Operand : Op.getNode()->op_values()) {
16111 if (!isa<ConstantSDNode>(Operand))
16112 return SDValue();
16113 }
16114
16115 // Assemble build vector operands as a 128-bit register value
16116 // We need to reconstruct what the 128-bit register pattern would be
16117 // that produces this vector when interpreted with the current endianness
16118 APInt FullVal = APInt::getZero(128);
16119
16120 for (unsigned Index = 0; Index < NumElems; ++Index) {
16121 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
16122
16123 // Get element value as raw bits (zero-extended)
16124 uint64_t ElemValue = C->getZExtValue();
16125
16126 // Mask to element size to ensure we only get the relevant bits
16127 if (ElemBits < 64)
16128 ElemValue &= ((1ULL << ElemBits) - 1);
16129
16130 // Calculate bit position for this element in the 128-bit register
16131 unsigned BitPos =
16132 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
16133
16134 // Create APInt for the element value and shift it to correct position
16135 APInt ElemAPInt(128, ElemValue);
16136 ElemAPInt <<= BitPos;
16137
16138 // Place the element value at the correct bit position
16139 FullVal |= ElemAPInt;
16140 }
16141
16142 if (FullVal.isZero() || FullVal.isAllOnes())
16143 return SDValue();
16144
16145 if (auto UIMOpt = getPatternInfo(FullVal)) {
16146 const auto &[Uim, ShiftAmount] = *UIMOpt;
16147 SDLoc Dl(Op);
16148
16149 // Generate LXVKQ instruction if the shift amount is zero.
16150 if (ShiftAmount == 0) {
16151 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
16152 SDValue LxvkqInstr =
16153 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
16155 << "combineBVLoadsSpecialValue: Instruction Emitted ";
16156 LxvkqInstr.dump());
16157 return LxvkqInstr;
16158 }
16159
16160 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
16161
16162 // The right shifted pattern can be constructed using a combination of
16163 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16164 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16165 // value 255.
16166 SDValue ShiftAmountVec =
16167 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
16168 DAG.getTargetConstant(255, Dl, MVT::i32)),
16169 0);
16170 // Generate appropriate right shift instruction
16171 SDValue ShiftVec = SDValue(
16172 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
16173 0);
16175 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16176 ShiftVec.dump());
16177 return ShiftVec;
16178 }
16179 // No patterns matched for build vectors.
16180 return SDValue();
16181}
16182
16183/// Reduce the number of loads when building a vector.
16184///
16185/// Building a vector out of multiple loads can be converted to a load
16186/// of the vector type if the loads are consecutive. If the loads are
16187/// consecutive but in descending order, a shuffle is added at the end
16188/// to reorder the vector.
16190 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16191 "Should be called with a BUILD_VECTOR node");
16192
16193 SDLoc dl(N);
16194
16195 // Return early for non byte-sized type, as they can't be consecutive.
16196 if (!N->getValueType(0).getVectorElementType().isByteSized())
16197 return SDValue();
16198
16199 bool InputsAreConsecutiveLoads = true;
16200 bool InputsAreReverseConsecutive = true;
16201 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
16202 SDValue FirstInput = N->getOperand(0);
16203 bool IsRoundOfExtLoad = false;
16204 LoadSDNode *FirstLoad = nullptr;
16205
16206 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16207 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
16208 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
16209 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16210 }
16211 // Not a build vector of (possibly fp_rounded) loads.
16212 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
16213 N->getNumOperands() == 1)
16214 return SDValue();
16215
16216 if (!IsRoundOfExtLoad)
16217 FirstLoad = cast<LoadSDNode>(FirstInput);
16218
16220 InputLoads.push_back(FirstLoad);
16221 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
16222 // If any inputs are fp_round(extload), they all must be.
16223 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
16224 return SDValue();
16225
16226 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
16227 N->getOperand(i);
16228 if (NextInput.getOpcode() != ISD::LOAD)
16229 return SDValue();
16230
16231 SDValue PreviousInput =
16232 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
16233 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
16234 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
16235
16236 // If any inputs are fp_round(extload), they all must be.
16237 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16238 return SDValue();
16239
16240 // We only care about regular loads. The PPC-specific load intrinsics
16241 // will not lead to a merge opportunity.
16242 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
16243 InputsAreConsecutiveLoads = false;
16244 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
16245 InputsAreReverseConsecutive = false;
16246
16247 // Exit early if the loads are neither consecutive nor reverse consecutive.
16248 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16249 return SDValue();
16250 InputLoads.push_back(LD2);
16251 }
16252
16253 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16254 "The loads cannot be both consecutive and reverse consecutive.");
16255
16256 SDValue WideLoad;
16257 SDValue ReturnSDVal;
16258 if (InputsAreConsecutiveLoads) {
16259 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16260 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
16261 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16262 FirstLoad->getAlign());
16263 ReturnSDVal = WideLoad;
16264 } else if (InputsAreReverseConsecutive) {
16265 LoadSDNode *LastLoad = InputLoads.back();
16266 assert(LastLoad && "Input needs to be a LoadSDNode.");
16267 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
16268 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
16269 LastLoad->getAlign());
16271 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16272 Ops.push_back(i);
16273
16274 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
16275 DAG.getUNDEF(N->getValueType(0)), Ops);
16276 } else
16277 return SDValue();
16278
16279 for (auto *LD : InputLoads)
16280 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
16281 return ReturnSDVal;
16282}
16283
16284// This function adds the required vector_shuffle needed to get
16285// the elements of the vector extract in the correct position
16286// as specified by the CorrectElems encoding.
16288 SDValue Input, uint64_t Elems,
16289 uint64_t CorrectElems) {
16290 SDLoc dl(N);
16291
16292 unsigned NumElems = Input.getValueType().getVectorNumElements();
16293 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16294
16295 // Knowing the element indices being extracted from the original
16296 // vector and the order in which they're being inserted, just put
16297 // them at element indices required for the instruction.
16298 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16299 if (DAG.getDataLayout().isLittleEndian())
16300 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16301 else
16302 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16303 CorrectElems = CorrectElems >> 8;
16304 Elems = Elems >> 8;
16305 }
16306
16307 SDValue Shuffle =
16308 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
16309 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
16310
16311 EVT VT = N->getValueType(0);
16312 SDValue Conv = DAG.getBitcast(VT, Shuffle);
16313
16314 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
16315 Input.getValueType().getVectorElementType(),
16317 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
16318 DAG.getValueType(ExtVT));
16319}
16320
16321// Look for build vector patterns where input operands come from sign
16322// extended vector_extract elements of specific indices. If the correct indices
16323// aren't used, add a vector shuffle to fix up the indices and create
16324// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16325// during instruction selection.
16327 // This array encodes the indices that the vector sign extend instructions
16328 // extract from when extending from one type to another for both BE and LE.
16329 // The right nibble of each byte corresponds to the LE incides.
16330 // and the left nibble of each byte corresponds to the BE incides.
16331 // For example: 0x3074B8FC byte->word
16332 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16333 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16334 // For example: 0x000070F8 byte->double word
16335 // For LE: the allowed indices are: 0x0,0x8
16336 // For BE: the allowed indices are: 0x7,0xF
16337 uint64_t TargetElems[] = {
16338 0x3074B8FC, // b->w
16339 0x000070F8, // b->d
16340 0x10325476, // h->w
16341 0x00003074, // h->d
16342 0x00001032, // w->d
16343 };
16344
16345 uint64_t Elems = 0;
16346 int Index;
16347 SDValue Input;
16348
16349 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16350 if (!Op)
16351 return false;
16352 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16353 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16354 return false;
16355
16356 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16357 // of the right width.
16358 SDValue Extract = Op.getOperand(0);
16359 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16360 Extract = Extract.getOperand(0);
16361 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16362 return false;
16363
16365 if (!ExtOp)
16366 return false;
16367
16368 Index = ExtOp->getZExtValue();
16369 if (Input && Input != Extract.getOperand(0))
16370 return false;
16371
16372 if (!Input)
16373 Input = Extract.getOperand(0);
16374
16375 Elems = Elems << 8;
16376 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16377 Elems |= Index;
16378
16379 return true;
16380 };
16381
16382 // If the build vector operands aren't sign extended vector extracts,
16383 // of the same input vector, then return.
16384 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16385 if (!isSExtOfVecExtract(N->getOperand(i))) {
16386 return SDValue();
16387 }
16388 }
16389
16390 // If the vector extract indices are not correct, add the appropriate
16391 // vector_shuffle.
16392 int TgtElemArrayIdx;
16393 int InputSize = Input.getValueType().getScalarSizeInBits();
16394 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16395 if (InputSize + OutputSize == 40)
16396 TgtElemArrayIdx = 0;
16397 else if (InputSize + OutputSize == 72)
16398 TgtElemArrayIdx = 1;
16399 else if (InputSize + OutputSize == 48)
16400 TgtElemArrayIdx = 2;
16401 else if (InputSize + OutputSize == 80)
16402 TgtElemArrayIdx = 3;
16403 else if (InputSize + OutputSize == 96)
16404 TgtElemArrayIdx = 4;
16405 else
16406 return SDValue();
16407
16408 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16409 CorrectElems = DAG.getDataLayout().isLittleEndian()
16410 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16411 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16412 if (Elems != CorrectElems) {
16413 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16414 }
16415
16416 // Regular lowering will catch cases where a shuffle is not needed.
16417 return SDValue();
16418}
16419
16420// Look for the pattern of a load from a narrow width to i128, feeding
16421// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16422// (LXVRZX). This node represents a zero extending load that will be matched
16423// to the Load VSX Vector Rightmost instructions.
16425 SDLoc DL(N);
16426
16427 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16428 if (N->getValueType(0) != MVT::v1i128)
16429 return SDValue();
16430
16431 SDValue Operand = N->getOperand(0);
16432 // Proceed with the transformation if the operand to the BUILD_VECTOR
16433 // is a load instruction.
16434 if (Operand.getOpcode() != ISD::LOAD)
16435 return SDValue();
16436
16437 auto *LD = cast<LoadSDNode>(Operand);
16438 EVT MemoryType = LD->getMemoryVT();
16439
16440 // This transformation is only valid if the we are loading either a byte,
16441 // halfword, word, or doubleword.
16442 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16443 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16444
16445 // Ensure that the load from the narrow width is being zero extended to i128.
16446 if (!ValidLDType ||
16447 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16448 LD->getExtensionType() != ISD::EXTLOAD))
16449 return SDValue();
16450
16451 SDValue LoadOps[] = {
16452 LD->getChain(), LD->getBasePtr(),
16453 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16454
16455 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
16456 DAG.getVTList(MVT::v1i128, MVT::Other),
16457 LoadOps, MemoryType, LD->getMemOperand());
16458}
16459
16460SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16461 DAGCombinerInfo &DCI) const {
16462 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16463 "Should be called with a BUILD_VECTOR node");
16464
16465 SelectionDAG &DAG = DCI.DAG;
16466 SDLoc dl(N);
16467
16468 if (!Subtarget.hasVSX())
16469 return SDValue();
16470
16471 // The target independent DAG combiner will leave a build_vector of
16472 // float-to-int conversions intact. We can generate MUCH better code for
16473 // a float-to-int conversion of a vector of floats.
16474 SDValue FirstInput = N->getOperand(0);
16475 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16476 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16477 if (Reduced)
16478 return Reduced;
16479 }
16480
16481 // If we're building a vector out of consecutive loads, just load that
16482 // vector type.
16483 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16484 if (Reduced)
16485 return Reduced;
16486
16487 // If we're building a vector out of extended elements from another vector
16488 // we have P9 vector integer extend instructions. The code assumes legal
16489 // input types (i.e. it can't handle things like v4i16) so do not run before
16490 // legalization.
16491 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16492 Reduced = combineBVOfVecSExt(N, DAG);
16493 if (Reduced)
16494 return Reduced;
16495 }
16496
16497 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16498 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16499 // is a load from <valid narrow width> to i128.
16500 if (Subtarget.isISA3_1()) {
16501 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16502 if (BVOfZLoad)
16503 return BVOfZLoad;
16504 }
16505
16506 if (N->getValueType(0) != MVT::v2f64)
16507 return SDValue();
16508
16509 // Looking for:
16510 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16511 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16512 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16513 return SDValue();
16514 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16515 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16516 return SDValue();
16517 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16518 return SDValue();
16519
16520 SDValue Ext1 = FirstInput.getOperand(0);
16521 SDValue Ext2 = N->getOperand(1).getOperand(0);
16522 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16524 return SDValue();
16525
16526 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16527 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16528 if (!Ext1Op || !Ext2Op)
16529 return SDValue();
16530 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16531 Ext1.getOperand(0) != Ext2.getOperand(0))
16532 return SDValue();
16533
16534 int FirstElem = Ext1Op->getZExtValue();
16535 int SecondElem = Ext2Op->getZExtValue();
16536 int SubvecIdx;
16537 if (FirstElem == 0 && SecondElem == 1)
16538 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16539 else if (FirstElem == 2 && SecondElem == 3)
16540 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16541 else
16542 return SDValue();
16543
16544 SDValue SrcVec = Ext1.getOperand(0);
16545 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16546 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16547 return DAG.getNode(NodeType, dl, MVT::v2f64,
16548 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16549}
16550
16551SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16552 DAGCombinerInfo &DCI) const {
16553 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16554 N->getOpcode() == ISD::UINT_TO_FP) &&
16555 "Need an int -> FP conversion node here");
16556
16557 if (useSoftFloat() || !Subtarget.has64BitSupport())
16558 return SDValue();
16559
16560 SelectionDAG &DAG = DCI.DAG;
16561 SDLoc dl(N);
16562 SDValue Op(N, 0);
16563
16564 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16565 // from the hardware.
16566 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16567 return SDValue();
16568 if (!Op.getOperand(0).getValueType().isSimple())
16569 return SDValue();
16570 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16571 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16572 return SDValue();
16573
16574 SDValue FirstOperand(Op.getOperand(0));
16575 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16576 (FirstOperand.getValueType() == MVT::i8 ||
16577 FirstOperand.getValueType() == MVT::i16);
16578 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16579 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16580 bool DstDouble = Op.getValueType() == MVT::f64;
16581 unsigned ConvOp = Signed ?
16582 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16583 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16584 SDValue WidthConst =
16585 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16586 dl, false);
16587 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16588 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16589 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
16590 DAG.getVTList(MVT::f64, MVT::Other),
16591 Ops, MVT::i8, LDN->getMemOperand());
16592 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16593
16594 // For signed conversion, we need to sign-extend the value in the VSR
16595 if (Signed) {
16596 SDValue ExtOps[] = { Ld, WidthConst };
16597 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16598 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16599 } else
16600 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16601 }
16602
16603
16604 // For i32 intermediate values, unfortunately, the conversion functions
16605 // leave the upper 32 bits of the value are undefined. Within the set of
16606 // scalar instructions, we have no method for zero- or sign-extending the
16607 // value. Thus, we cannot handle i32 intermediate values here.
16608 if (Op.getOperand(0).getValueType() == MVT::i32)
16609 return SDValue();
16610
16611 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16612 "UINT_TO_FP is supported only with FPCVT");
16613
16614 // If we have FCFIDS, then use it when converting to single-precision.
16615 // Otherwise, convert to double-precision and then round.
16616 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16617 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16618 : PPCISD::FCFIDS)
16619 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16620 : PPCISD::FCFID);
16621 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16622 ? MVT::f32
16623 : MVT::f64;
16624
16625 // If we're converting from a float, to an int, and back to a float again,
16626 // then we don't need the store/load pair at all.
16627 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16628 Subtarget.hasFPCVT()) ||
16629 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16630 SDValue Src = Op.getOperand(0).getOperand(0);
16631 if (Src.getValueType() == MVT::f32) {
16632 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16633 DCI.AddToWorklist(Src.getNode());
16634 } else if (Src.getValueType() != MVT::f64) {
16635 // Make sure that we don't pick up a ppc_fp128 source value.
16636 return SDValue();
16637 }
16638
16639 unsigned FCTOp =
16640 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16641 PPCISD::FCTIDUZ;
16642
16643 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16644 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16645
16646 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16647 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16648 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16649 DCI.AddToWorklist(FP.getNode());
16650 }
16651
16652 return FP;
16653 }
16654
16655 return SDValue();
16656}
16657
16658// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16659// builtins) into loads with swaps.
16661 DAGCombinerInfo &DCI) const {
16662 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16663 // load combines.
16664 if (DCI.isBeforeLegalizeOps())
16665 return SDValue();
16666
16667 SelectionDAG &DAG = DCI.DAG;
16668 SDLoc dl(N);
16669 SDValue Chain;
16670 SDValue Base;
16671 MachineMemOperand *MMO;
16672
16673 switch (N->getOpcode()) {
16674 default:
16675 llvm_unreachable("Unexpected opcode for little endian VSX load");
16676 case ISD::LOAD: {
16678 Chain = LD->getChain();
16679 Base = LD->getBasePtr();
16680 MMO = LD->getMemOperand();
16681 // If the MMO suggests this isn't a load of a full vector, leave
16682 // things alone. For a built-in, we have to make the change for
16683 // correctness, so if there is a size problem that will be a bug.
16684 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16685 return SDValue();
16686 break;
16687 }
16690 Chain = Intrin->getChain();
16691 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16692 // us what we want. Get operand 2 instead.
16693 Base = Intrin->getOperand(2);
16694 MMO = Intrin->getMemOperand();
16695 break;
16696 }
16697 }
16698
16699 MVT VecTy = N->getValueType(0).getSimpleVT();
16700
16701 SDValue LoadOps[] = { Chain, Base };
16702 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
16703 DAG.getVTList(MVT::v2f64, MVT::Other),
16704 LoadOps, MVT::v2f64, MMO);
16705
16706 DCI.AddToWorklist(Load.getNode());
16707 Chain = Load.getValue(1);
16708 SDValue Swap = DAG.getNode(
16709 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16710 DCI.AddToWorklist(Swap.getNode());
16711
16712 // Add a bitcast if the resulting load type doesn't match v2f64.
16713 if (VecTy != MVT::v2f64) {
16714 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16715 DCI.AddToWorklist(N.getNode());
16716 // Package {bitcast value, swap's chain} to match Load's shape.
16717 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16718 N, Swap.getValue(1));
16719 }
16720
16721 return Swap;
16722}
16723
16724// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16725// builtins) into stores with swaps.
16727 DAGCombinerInfo &DCI) const {
16728 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16729 // store combines.
16730 if (DCI.isBeforeLegalizeOps())
16731 return SDValue();
16732
16733 SelectionDAG &DAG = DCI.DAG;
16734 SDLoc dl(N);
16735 SDValue Chain;
16736 SDValue Base;
16737 unsigned SrcOpnd;
16738 MachineMemOperand *MMO;
16739
16740 switch (N->getOpcode()) {
16741 default:
16742 llvm_unreachable("Unexpected opcode for little endian VSX store");
16743 case ISD::STORE: {
16745 Chain = ST->getChain();
16746 Base = ST->getBasePtr();
16747 MMO = ST->getMemOperand();
16748 SrcOpnd = 1;
16749 // If the MMO suggests this isn't a store of a full vector, leave
16750 // things alone. For a built-in, we have to make the change for
16751 // correctness, so if there is a size problem that will be a bug.
16752 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16753 return SDValue();
16754 break;
16755 }
16756 case ISD::INTRINSIC_VOID: {
16758 Chain = Intrin->getChain();
16759 // Intrin->getBasePtr() oddly does not get what we want.
16760 Base = Intrin->getOperand(3);
16761 MMO = Intrin->getMemOperand();
16762 SrcOpnd = 2;
16763 break;
16764 }
16765 }
16766
16767 SDValue Src = N->getOperand(SrcOpnd);
16768 MVT VecTy = Src.getValueType().getSimpleVT();
16769
16770 // All stores are done as v2f64 and possible bit cast.
16771 if (VecTy != MVT::v2f64) {
16772 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16773 DCI.AddToWorklist(Src.getNode());
16774 }
16775
16776 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16777 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16778 DCI.AddToWorklist(Swap.getNode());
16779 Chain = Swap.getValue(1);
16780 SDValue StoreOps[] = { Chain, Swap, Base };
16781 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
16782 DAG.getVTList(MVT::Other),
16783 StoreOps, VecTy, MMO);
16784 DCI.AddToWorklist(Store.getNode());
16785 return Store;
16786}
16787
16788// Handle DAG combine for STORE (FP_TO_INT F).
16789SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16790 DAGCombinerInfo &DCI) const {
16791 SelectionDAG &DAG = DCI.DAG;
16792 SDLoc dl(N);
16793 unsigned Opcode = N->getOperand(1).getOpcode();
16794 (void)Opcode;
16795 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16796
16797 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16798 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16799 && "Not a FP_TO_INT Instruction!");
16800
16801 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16802 EVT Op1VT = N->getOperand(1).getValueType();
16803 EVT ResVT = Val.getValueType();
16804
16805 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16806 return SDValue();
16807
16808 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16809 bool ValidTypeForStoreFltAsInt =
16810 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16811 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16812
16813 // TODO: Lower conversion from f128 on all VSX targets
16814 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16815 return SDValue();
16816
16817 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16818 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16819 return SDValue();
16820
16821 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16822
16823 // Set number of bytes being converted.
16824 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16825 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16826 DAG.getIntPtrConstant(ByteSize, dl, false),
16827 DAG.getValueType(Op1VT)};
16828
16829 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
16830 DAG.getVTList(MVT::Other), Ops,
16831 cast<StoreSDNode>(N)->getMemoryVT(),
16832 cast<StoreSDNode>(N)->getMemOperand());
16833
16834 return Val;
16835}
16836
16837static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16838 // Check that the source of the element keeps flipping
16839 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16840 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16841 for (int i = 1, e = Mask.size(); i < e; i++) {
16842 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16843 return false;
16844 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16845 return false;
16846 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16847 }
16848 return true;
16849}
16850
16851static bool isSplatBV(SDValue Op) {
16852 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16853 return false;
16854 SDValue FirstOp;
16855
16856 // Find first non-undef input.
16857 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16858 FirstOp = Op.getOperand(i);
16859 if (!FirstOp.isUndef())
16860 break;
16861 }
16862
16863 // All inputs are undef or the same as the first non-undef input.
16864 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16865 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16866 return false;
16867 return true;
16868}
16869
16871 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16872 return Op;
16873 if (Op.getOpcode() != ISD::BITCAST)
16874 return SDValue();
16875 Op = Op.getOperand(0);
16876 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16877 return Op;
16878 return SDValue();
16879}
16880
16881// Fix up the shuffle mask to account for the fact that the result of
16882// scalar_to_vector is not in lane zero. This just takes all values in
16883// the ranges specified by the min/max indices and adds the number of
16884// elements required to ensure each element comes from the respective
16885// position in the valid lane.
16886// On little endian, that's just the corresponding element in the other
16887// half of the vector. On big endian, it is in the same half but right
16888// justified rather than left justified in that half.
16890 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16891 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16892 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16893 int LHSEltFixup =
16894 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16895 int RHSEltFixup =
16896 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16897 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16898 int Idx = ShuffV[I];
16899 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16900 ShuffV[I] += LHSEltFixup;
16901 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16902 ShuffV[I] += RHSEltFixup;
16903 }
16904}
16905
16906// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16907// the original is:
16908// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16909// In such a case, just change the shuffle mask to extract the element
16910// from the permuted index.
16912 const PPCSubtarget &Subtarget) {
16913 SDLoc dl(OrigSToV);
16914 EVT VT = OrigSToV.getValueType();
16915 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16916 "Expecting a SCALAR_TO_VECTOR here");
16917 SDValue Input = OrigSToV.getOperand(0);
16918
16919 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16920 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
16921 SDValue OrigVector = Input.getOperand(0);
16922
16923 // Can't handle non-const element indices or different vector types
16924 // for the input to the extract and the output of the scalar_to_vector.
16925 if (Idx && VT == OrigVector.getValueType()) {
16926 unsigned NumElts = VT.getVectorNumElements();
16927 assert(
16928 NumElts > 1 &&
16929 "Cannot produce a permuted scalar_to_vector for one element vector");
16930 SmallVector<int, 16> NewMask(NumElts, -1);
16931 unsigned ResultInElt = NumElts / 2;
16932 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16933 NewMask[ResultInElt] = Idx->getZExtValue();
16934 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
16935 }
16936 }
16937 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
16938 OrigSToV.getOperand(0));
16939}
16940
16942 int HalfVec, int LHSLastElementDefined,
16943 int RHSLastElementDefined) {
16944 for (int Index : ShuffV) {
16945 if (Index < 0) // Skip explicitly undefined mask indices.
16946 continue;
16947 // Handle first input vector of the vector_shuffle.
16948 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16949 (Index > LHSLastElementDefined))
16950 return false;
16951 // Handle second input vector of the vector_shuffle.
16952 if ((RHSLastElementDefined >= 0) &&
16953 (Index > HalfVec + RHSLastElementDefined))
16954 return false;
16955 }
16956 return true;
16957}
16958
16960 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16961 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16962 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16963 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16964 // Set up the values for the shuffle vector fixup.
16965 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16966 // The last element depends on if the input comes from the LHS or RHS.
16967 //
16968 // For example:
16969 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16970 //
16971 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16972 // because elements 1 and higher of a scalar_to_vector are undefined.
16973 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16974 // because elements 1 and higher of a scalar_to_vector are undefined.
16975 // It is also not 4 because the original scalar_to_vector is wider and
16976 // actually contains two i32 elements.
16977 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16978 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16979 : FirstElt;
16980 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
16981 if (SToVPermuted.getValueType() != VecShuffOperandType)
16982 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
16983 return SToVPermuted;
16984}
16985
16986// On little endian subtargets, combine shuffles such as:
16987// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16988// into:
16989// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16990// because the latter can be matched to a single instruction merge.
16991// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16992// to put the value into element zero. Adjust the shuffle mask so that the
16993// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16994// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16995// nodes with elements smaller than doubleword because all the ways
16996// of getting scalar data into a vector register put the value in the
16997// rightmost element of the left half of the vector.
16998SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16999 SelectionDAG &DAG) const {
17000 SDValue LHS = SVN->getOperand(0);
17001 SDValue RHS = SVN->getOperand(1);
17002 auto Mask = SVN->getMask();
17003 int NumElts = LHS.getValueType().getVectorNumElements();
17004 SDValue Res(SVN, 0);
17005 SDLoc dl(SVN);
17006 bool IsLittleEndian = Subtarget.isLittleEndian();
17007
17008 // On big endian targets this is only useful for subtargets with direct moves.
17009 // On little endian targets it would be useful for all subtargets with VSX.
17010 // However adding special handling for LE subtargets without direct moves
17011 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
17012 // which includes direct moves.
17013 if (!Subtarget.hasDirectMove())
17014 return Res;
17015
17016 // If this is not a shuffle of a shuffle and the first element comes from
17017 // the second vector, canonicalize to the commuted form. This will make it
17018 // more likely to match one of the single instruction patterns.
17019 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
17020 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
17021 std::swap(LHS, RHS);
17022 Res = DAG.getCommutedVectorShuffle(*SVN);
17023
17024 if (!isa<ShuffleVectorSDNode>(Res))
17025 return Res;
17026
17027 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17028 }
17029
17030 // Adjust the shuffle mask if either input vector comes from a
17031 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
17032 // form (to prevent the need for a swap).
17033 SmallVector<int, 16> ShuffV(Mask);
17034 SDValue SToVLHS = isScalarToVec(LHS);
17035 SDValue SToVRHS = isScalarToVec(RHS);
17036 if (SToVLHS || SToVRHS) {
17037 EVT VT = SVN->getValueType(0);
17038 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
17039 int ShuffleNumElts = ShuffV.size();
17040 int HalfVec = ShuffleNumElts / 2;
17041 // The width of the "valid lane" (i.e. the lane that contains the value that
17042 // is vectorized) needs to be expressed in terms of the number of elements
17043 // of the shuffle. It is thereby the ratio of the values before and after
17044 // any bitcast, which will be set later on if the LHS or RHS are
17045 // SCALAR_TO_VECTOR nodes.
17046 unsigned LHSNumValidElts = HalfVec;
17047 unsigned RHSNumValidElts = HalfVec;
17048
17049 // Initially assume that neither input is permuted. These will be adjusted
17050 // accordingly if either input is. Note, that -1 means that all elements
17051 // are undefined.
17052 int LHSFirstElt = 0;
17053 int RHSFirstElt = ShuffleNumElts;
17054 int LHSLastElt = -1;
17055 int RHSLastElt = -1;
17056
17057 // Get the permuted scalar to vector nodes for the source(s) that come from
17058 // ISD::SCALAR_TO_VECTOR.
17059 // On big endian systems, this only makes sense for element sizes smaller
17060 // than 64 bits since for 64-bit elements, all instructions already put
17061 // the value into element zero. Since scalar size of LHS and RHS may differ
17062 // after isScalarToVec, this should be checked using their own sizes.
17063 int LHSScalarSize = 0;
17064 int RHSScalarSize = 0;
17065 if (SToVLHS) {
17066 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
17067 if (!IsLittleEndian && LHSScalarSize >= 64)
17068 return Res;
17069 }
17070 if (SToVRHS) {
17071 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
17072 if (!IsLittleEndian && RHSScalarSize >= 64)
17073 return Res;
17074 }
17075 if (LHSScalarSize != 0)
17077 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
17078 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
17079 if (RHSScalarSize != 0)
17081 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
17082 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
17083
17084 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
17085 return Res;
17086
17087 // Fix up the shuffle mask to reflect where the desired element actually is.
17088 // The minimum and maximum indices that correspond to element zero for both
17089 // the LHS and RHS are computed and will control which shuffle mask entries
17090 // are to be changed. For example, if the RHS is permuted, any shuffle mask
17091 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
17093 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
17094 LHSNumValidElts, RHSNumValidElts, Subtarget);
17095 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17096
17097 // We may have simplified away the shuffle. We won't be able to do anything
17098 // further with it here.
17099 if (!isa<ShuffleVectorSDNode>(Res))
17100 return Res;
17101 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17102 }
17103
17104 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
17105 // The common case after we commuted the shuffle is that the RHS is a splat
17106 // and we have elements coming in from the splat at indices that are not
17107 // conducive to using a merge.
17108 // Example:
17109 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
17110 if (!isSplatBV(TheSplat))
17111 return Res;
17112
17113 // We are looking for a mask such that all even elements are from
17114 // one vector and all odd elements from the other.
17115 if (!isAlternatingShuffMask(Mask, NumElts))
17116 return Res;
17117
17118 // Adjust the mask so we are pulling in the same index from the splat
17119 // as the index from the interesting vector in consecutive elements.
17120 if (IsLittleEndian) {
17121 // Example (even elements from first vector):
17122 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
17123 if (Mask[0] < NumElts)
17124 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17125 if (ShuffV[i] < 0)
17126 continue;
17127 // If element from non-splat is undef, pick first element from splat.
17128 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
17129 }
17130 // Example (odd elements from first vector):
17131 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
17132 else
17133 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17134 if (ShuffV[i] < 0)
17135 continue;
17136 // If element from non-splat is undef, pick first element from splat.
17137 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
17138 }
17139 } else {
17140 // Example (even elements from first vector):
17141 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
17142 if (Mask[0] < NumElts)
17143 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17144 if (ShuffV[i] < 0)
17145 continue;
17146 // If element from non-splat is undef, pick first element from splat.
17147 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
17148 }
17149 // Example (odd elements from first vector):
17150 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
17151 else
17152 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17153 if (ShuffV[i] < 0)
17154 continue;
17155 // If element from non-splat is undef, pick first element from splat.
17156 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
17157 }
17158 }
17159
17160 // If the RHS has undefs, we need to remove them since we may have created
17161 // a shuffle that adds those instead of the splat value.
17162 SDValue SplatVal =
17163 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
17164 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
17165
17166 if (IsLittleEndian)
17167 RHS = TheSplat;
17168 else
17169 LHS = TheSplat;
17170 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17171}
17172
17173SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17174 LSBaseSDNode *LSBase,
17175 DAGCombinerInfo &DCI) const {
17176 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
17177 "Not a reverse memop pattern!");
17178
17179 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
17180 auto Mask = SVN->getMask();
17181 int i = 0;
17182 auto I = Mask.rbegin();
17183 auto E = Mask.rend();
17184
17185 for (; I != E; ++I) {
17186 if (*I != i)
17187 return false;
17188 i++;
17189 }
17190 return true;
17191 };
17192
17193 SelectionDAG &DAG = DCI.DAG;
17194 EVT VT = SVN->getValueType(0);
17195
17196 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
17197 return SDValue();
17198
17199 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17200 // See comment in PPCVSXSwapRemoval.cpp.
17201 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17202 if (!Subtarget.hasP9Vector())
17203 return SDValue();
17204
17205 if(!IsElementReverse(SVN))
17206 return SDValue();
17207
17208 if (LSBase->getOpcode() == ISD::LOAD) {
17209 // If the load return value 0 has more than one user except the
17210 // shufflevector instruction, it is not profitable to replace the
17211 // shufflevector with a reverse load.
17212 for (SDUse &Use : LSBase->uses())
17213 if (Use.getResNo() == 0 &&
17214 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17215 return SDValue();
17216
17217 SDLoc dl(LSBase);
17218 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17219 return DAG.getMemIntrinsicNode(
17220 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
17221 LSBase->getMemoryVT(), LSBase->getMemOperand());
17222 }
17223
17224 if (LSBase->getOpcode() == ISD::STORE) {
17225 // If there are other uses of the shuffle, the swap cannot be avoided.
17226 // Forcing the use of an X-Form (since swapped stores only have
17227 // X-Forms) without removing the swap is unprofitable.
17228 if (!SVN->hasOneUse())
17229 return SDValue();
17230
17231 SDLoc dl(LSBase);
17232 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
17233 LSBase->getBasePtr()};
17234 return DAG.getMemIntrinsicNode(
17235 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
17236 LSBase->getMemoryVT(), LSBase->getMemOperand());
17237 }
17238
17239 llvm_unreachable("Expected a load or store node here");
17240}
17241
17242static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17243 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
17244 if (IntrinsicID == Intrinsic::ppc_stdcx)
17245 StoreWidth = 8;
17246 else if (IntrinsicID == Intrinsic::ppc_stwcx)
17247 StoreWidth = 4;
17248 else if (IntrinsicID == Intrinsic::ppc_sthcx)
17249 StoreWidth = 2;
17250 else if (IntrinsicID == Intrinsic::ppc_stbcx)
17251 StoreWidth = 1;
17252 else
17253 return false;
17254 return true;
17255}
17256
17259 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
17260 // (ADDC (ADDE 0, 0, C), -1) -> C
17261 SDValue LHS = N->getOperand(0);
17262 SDValue RHS = N->getOperand(1);
17263 if (LHS->getOpcode() == PPCISD::ADDE &&
17264 isNullConstant(LHS->getOperand(0)) &&
17265 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
17266 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
17267 }
17268 }
17269 return SDValue();
17270}
17271
17272// Optimize zero-extension of setcc when the compared value is known to be 0
17273// or 1.
17274//
17275// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
17276// -> zext(xor(Value, 1)) for seteq
17277// -> zext(Value) for setne
17278//
17279// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
17280// by keeping the value in its original i32 type throughout.
17281//
17282// Example:
17283// Before: zext(setcc(test_data_class(...), 0, seteq))
17284// // test_data_class returns 0 or 1 in i32
17285// // setcc converts i32 -> i1
17286// // zext converts i1 -> i64
17287// After: zext(xor(test_data_class(...), 1))
17288// // Stays in i32, then extends to i64
17289//
17290// This is beneficial because:
17291// 1. Eliminates the setcc instruction
17292// 2. Avoids i32 -> i1 truncation
17293// 3. Keeps computation in native integer width
17294
17296 // Check if this is a zero_extend
17297 if (N->getOpcode() != ISD::ZERO_EXTEND)
17298 return SDValue();
17299
17300 SDValue Src = N->getOperand(0);
17301
17302 // Check if the source is a setcc
17303 if (Src.getOpcode() != ISD::SETCC)
17304 return SDValue();
17305
17306 SDValue LHS = Src.getOperand(0);
17307 SDValue RHS = Src.getOperand(1);
17308 ISD::CondCode CC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
17309
17311 return SDValue();
17312
17313 SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS;
17314
17315 auto isZeroOrOne = [=](SDValue &V) {
17316 if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17317 V.getConstantOperandVal(0) == Intrinsic::ppc_test_data_class)
17318 return true;
17319 return false;
17320 };
17321
17322 if (!isZeroOrOne(NonNullConstant))
17323 return SDValue();
17324
17325 // Check for pattern: zext(setcc (Value), 0, seteq)) or
17326 // zext(setcc (Value), 0, setne))
17327 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
17328 // Replace with: zext(xor(Value, 1)) for seteq
17329 // or: zext(Value) for setne
17330 // This keeps the value in i32 instead of converting to i1
17331 SDLoc DL(N);
17332 EVT VType = N->getValueType(0);
17333 SDValue NewNonNullConstant = DAG.getZExtOrTrunc(NonNullConstant, DL, VType);
17334
17335 if (CC == ISD::SETNE)
17336 return NewNonNullConstant;
17337
17338 SDValue One = DAG.getConstant(1, DL, VType);
17339 return DAG.getNode(ISD::XOR, DL, VType, NewNonNullConstant, One);
17340 }
17341
17342 return SDValue();
17343}
17344
17345// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
17346// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
17347// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
17348// 1, cc))
17349// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
17350// 0, 1, cc))
17351// 4. etc
17353 assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
17354
17355 EVT XorVT = N->getValueType(0);
17356 if ((XorVT != MVT::i32 && XorVT != MVT::i64))
17357 return SDValue();
17358
17359 SDValue LHS = N->getOperand(0);
17360 SDValue RHS = N->getOperand(1);
17361
17362 // Check for XOR with constant 1
17364 if (!XorConst || !XorConst->isOne()) {
17365 XorConst = dyn_cast<ConstantSDNode>(LHS);
17366 if (!XorConst || !XorConst->isOne())
17367 return SDValue();
17368 // Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
17369 std::swap(LHS, RHS);
17370 }
17371
17372 // Check if LHS has only one use
17373 if (!LHS.hasOneUse())
17374 return SDValue();
17375
17376 // Handle extensions: ZEXT, ANYEXT
17377 SDValue SelectNode = LHS;
17378
17379 if (LHS.getOpcode() == ISD::ZERO_EXTEND ||
17380 LHS.getOpcode() == ISD::ANY_EXTEND) {
17381 SelectNode = LHS.getOperand(0);
17382
17383 // Check if the extension input has only one use
17384 if (!SelectNode.hasOneUse())
17385 return SDValue();
17386 }
17387
17388 // Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
17389 if (!SelectNode.isMachineOpcode())
17390 return SDValue();
17391
17392 unsigned MachineOpc = SelectNode.getMachineOpcode();
17393
17394 // Handle both SELECT_CC_I4 and SELECT_CC_I8
17395 if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
17396 return SDValue();
17397
17398 // SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
17399 if (SelectNode.getNumOperands() != 4)
17400 return SDValue();
17401
17402 ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(1));
17403 ConstantSDNode *ConstOp2 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(2));
17404
17405 if (!ConstOp1 || !ConstOp2)
17406 return SDValue();
17407
17408 // Only optimize if operands are {0, 1} or {1, 0}
17409 if (!((ConstOp1->isOne() && ConstOp2->isZero()) ||
17410 (ConstOp1->isZero() && ConstOp2->isOne())))
17411 return SDValue();
17412
17413 // Pattern matched! Create new SELECT_CC with swapped 0/1 operands to
17414 // eliminate XOR. If original was SELECT_CC(cond, 1, 0, pred), create
17415 // SELECT_CC(cond, 0, 1, pred). If original was SELECT_CC(cond, 0, 1, pred),
17416 // create SELECT_CC(cond, 1, 0, pred).
17417 SDLoc DL(N);
17418 MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
17419
17420 bool ConstOp1IsOne = ConstOp1->isOne();
17421 return SDValue(
17422 DAG.getMachineNode(MachineOpc, DL, XorVT,
17423 {SelectNode.getOperand(0),
17424 DAG.getConstant(ConstOp1IsOne ? 0 : 1, DL, XorVT),
17425 DAG.getConstant(ConstOp1IsOne ? 1 : 0, DL, XorVT),
17426 SelectNode.getOperand(3)}),
17427 0);
17428}
17429
17431 DAGCombinerInfo &DCI) const {
17432 SelectionDAG &DAG = DCI.DAG;
17433 SDLoc dl(N);
17434 switch (N->getOpcode()) {
17435 default: break;
17436 case ISD::ADD:
17437 return combineADD(N, DCI);
17438 case ISD::AND: {
17439 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17440 // original input as that will prevent us from selecting optimal rotates.
17441 // This only matters if the input to the extend is i32 widened to i64.
17442 SDValue Op1 = N->getOperand(0);
17443 SDValue Op2 = N->getOperand(1);
17444 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17445 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17446 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
17447 Op1.getOperand(0).getValueType() != MVT::i32)
17448 break;
17449 SDValue NarrowOp = Op1.getOperand(0);
17450 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17451 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17452 break;
17453
17454 uint64_t Imm = Op2->getAsZExtVal();
17455 // Make sure that the constant is narrow enough to fit in the narrow type.
17456 if (!isUInt<32>(Imm))
17457 break;
17458 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
17459 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
17460 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
17461 }
17462 case ISD::XOR: {
17463 // Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
17464 if (SDValue V = combineXorSelectCC(N, DAG))
17465 return V;
17466 break;
17467 }
17468 case ISD::SHL:
17469 return combineSHL(N, DCI);
17470 case ISD::SRA:
17471 return combineSRA(N, DCI);
17472 case ISD::SRL:
17473 return combineSRL(N, DCI);
17474 case ISD::MUL:
17475 return combineMUL(N, DCI);
17476 case ISD::FMA:
17477 case PPCISD::FNMSUB:
17478 return combineFMALike(N, DCI);
17479 case PPCISD::SHL:
17480 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
17481 return N->getOperand(0);
17482 break;
17483 case PPCISD::SRL:
17484 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
17485 return N->getOperand(0);
17486 break;
17487 case PPCISD::SRA:
17488 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
17489 if (C->isZero() || // 0 >>s V -> 0.
17490 C->isAllOnes()) // -1 >>s V -> -1.
17491 return N->getOperand(0);
17492 }
17493 break;
17494 case ISD::ZERO_EXTEND:
17495 if (SDValue RetV = combineZextSetccWithZero(N, DCI.DAG))
17496 return RetV;
17497 [[fallthrough]];
17498 case ISD::SIGN_EXTEND:
17499 case ISD::ANY_EXTEND:
17500 return DAGCombineExtBoolTrunc(N, DCI);
17501 case ISD::TRUNCATE:
17502 return combineTRUNCATE(N, DCI);
17503 case ISD::SETCC:
17504 if (SDValue CSCC = combineSetCC(N, DCI))
17505 return CSCC;
17506 [[fallthrough]];
17507 case ISD::SELECT_CC:
17508 return DAGCombineTruncBoolExt(N, DCI);
17509 case ISD::SINT_TO_FP:
17510 case ISD::UINT_TO_FP:
17511 return combineFPToIntToFP(N, DCI);
17513 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17514 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17515 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17516 }
17517 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17518 case ISD::STORE: {
17519
17520 EVT Op1VT = N->getOperand(1).getValueType();
17521 unsigned Opcode = N->getOperand(1).getOpcode();
17522
17523 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17524 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17525 SDValue Val = combineStoreFPToInt(N, DCI);
17526 if (Val)
17527 return Val;
17528 }
17529
17530 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17531 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17532 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17533 if (Val)
17534 return Val;
17535 }
17536
17537 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17538 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17539 N->getOperand(1).getNode()->hasOneUse() &&
17540 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17541 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17542
17543 // STBRX can only handle simple types and it makes no sense to store less
17544 // two bytes in byte-reversed order.
17545 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17546 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17547 break;
17548
17549 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17550 // Do an any-extend to 32-bits if this is a half-word input.
17551 if (BSwapOp.getValueType() == MVT::i16)
17552 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17553
17554 // If the type of BSWAP operand is wider than stored memory width
17555 // it need to be shifted to the right side before STBRX.
17556 if (Op1VT.bitsGT(mVT)) {
17557 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17558 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17559 DAG.getConstant(Shift, dl, MVT::i32));
17560 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17561 if (Op1VT == MVT::i64)
17562 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17563 }
17564
17565 SDValue Ops[] = {
17566 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17567 };
17568 return
17569 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17570 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17571 cast<StoreSDNode>(N)->getMemOperand());
17572 }
17573
17574 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17575 // So it can increase the chance of CSE constant construction.
17576 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17577 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17578 // Need to sign-extended to 64-bits to handle negative values.
17579 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17580 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17581 MemVT.getSizeInBits());
17582 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17583
17584 auto *ST = cast<StoreSDNode>(N);
17585 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17586 ST->getBasePtr(), ST->getOffset(), MemVT,
17587 ST->getMemOperand(), ST->getAddressingMode(),
17588 /*IsTruncating=*/true);
17589 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17590 // new store which will change the constant by removing non-demanded bits.
17591 return ST->isUnindexed()
17592 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17593 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17594 }
17595
17596 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17597 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17598 if (Op1VT.isSimple()) {
17599 MVT StoreVT = Op1VT.getSimpleVT();
17600 if (Subtarget.needsSwapsForVSXMemOps() &&
17601 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17602 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17603 return expandVSXStoreForLE(N, DCI);
17604 }
17605 break;
17606 }
17607 case ISD::LOAD: {
17609 EVT VT = LD->getValueType(0);
17610
17611 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17612 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17613 if (VT.isSimple()) {
17614 MVT LoadVT = VT.getSimpleVT();
17615 if (Subtarget.needsSwapsForVSXMemOps() &&
17616 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17617 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17618 return expandVSXLoadForLE(N, DCI);
17619 }
17620
17621 // We sometimes end up with a 64-bit integer load, from which we extract
17622 // two single-precision floating-point numbers. This happens with
17623 // std::complex<float>, and other similar structures, because of the way we
17624 // canonicalize structure copies. However, if we lack direct moves,
17625 // then the final bitcasts from the extracted integer values to the
17626 // floating-point numbers turn into store/load pairs. Even with direct moves,
17627 // just loading the two floating-point numbers is likely better.
17628 auto ReplaceTwoFloatLoad = [&]() {
17629 if (VT != MVT::i64)
17630 return false;
17631
17632 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17633 LD->isVolatile())
17634 return false;
17635
17636 // We're looking for a sequence like this:
17637 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17638 // t16: i64 = srl t13, Constant:i32<32>
17639 // t17: i32 = truncate t16
17640 // t18: f32 = bitcast t17
17641 // t19: i32 = truncate t13
17642 // t20: f32 = bitcast t19
17643
17644 if (!LD->hasNUsesOfValue(2, 0))
17645 return false;
17646
17647 auto UI = LD->user_begin();
17648 while (UI.getUse().getResNo() != 0) ++UI;
17649 SDNode *Trunc = *UI++;
17650 while (UI.getUse().getResNo() != 0) ++UI;
17651 SDNode *RightShift = *UI;
17652 if (Trunc->getOpcode() != ISD::TRUNCATE)
17653 std::swap(Trunc, RightShift);
17654
17655 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17656 Trunc->getValueType(0) != MVT::i32 ||
17657 !Trunc->hasOneUse())
17658 return false;
17659 if (RightShift->getOpcode() != ISD::SRL ||
17660 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17661 RightShift->getConstantOperandVal(1) != 32 ||
17662 !RightShift->hasOneUse())
17663 return false;
17664
17665 SDNode *Trunc2 = *RightShift->user_begin();
17666 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17667 Trunc2->getValueType(0) != MVT::i32 ||
17668 !Trunc2->hasOneUse())
17669 return false;
17670
17671 SDNode *Bitcast = *Trunc->user_begin();
17672 SDNode *Bitcast2 = *Trunc2->user_begin();
17673
17674 if (Bitcast->getOpcode() != ISD::BITCAST ||
17675 Bitcast->getValueType(0) != MVT::f32)
17676 return false;
17677 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17678 Bitcast2->getValueType(0) != MVT::f32)
17679 return false;
17680
17681 if (Subtarget.isLittleEndian())
17682 std::swap(Bitcast, Bitcast2);
17683
17684 // Bitcast has the second float (in memory-layout order) and Bitcast2
17685 // has the first one.
17686
17687 SDValue BasePtr = LD->getBasePtr();
17688 if (LD->isIndexed()) {
17689 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17690 "Non-pre-inc AM on PPC?");
17691 BasePtr =
17692 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17693 LD->getOffset());
17694 }
17695
17696 auto MMOFlags =
17697 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17698 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17699 LD->getPointerInfo(), LD->getAlign(),
17700 MMOFlags, LD->getAAInfo());
17701 SDValue AddPtr =
17702 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17703 BasePtr, DAG.getIntPtrConstant(4, dl));
17704 SDValue FloatLoad2 = DAG.getLoad(
17705 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17706 LD->getPointerInfo().getWithOffset(4),
17707 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17708
17709 if (LD->isIndexed()) {
17710 // Note that DAGCombine should re-form any pre-increment load(s) from
17711 // what is produced here if that makes sense.
17712 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17713 }
17714
17715 DCI.CombineTo(Bitcast2, FloatLoad);
17716 DCI.CombineTo(Bitcast, FloatLoad2);
17717
17718 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17719 SDValue(FloatLoad2.getNode(), 1));
17720 return true;
17721 };
17722
17723 if (ReplaceTwoFloatLoad())
17724 return SDValue(N, 0);
17725
17726 EVT MemVT = LD->getMemoryVT();
17727 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17728 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17729 if (LD->isUnindexed() && VT.isVector() &&
17730 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17731 // P8 and later hardware should just use LOAD.
17732 !Subtarget.hasP8Vector() &&
17733 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17734 VT == MVT::v4f32))) &&
17735 LD->getAlign() < ABIAlignment) {
17736 // This is a type-legal unaligned Altivec load.
17737 SDValue Chain = LD->getChain();
17738 SDValue Ptr = LD->getBasePtr();
17739 bool isLittleEndian = Subtarget.isLittleEndian();
17740
17741 // This implements the loading of unaligned vectors as described in
17742 // the venerable Apple Velocity Engine overview. Specifically:
17743 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17744 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17745 //
17746 // The general idea is to expand a sequence of one or more unaligned
17747 // loads into an alignment-based permutation-control instruction (lvsl
17748 // or lvsr), a series of regular vector loads (which always truncate
17749 // their input address to an aligned address), and a series of
17750 // permutations. The results of these permutations are the requested
17751 // loaded values. The trick is that the last "extra" load is not taken
17752 // from the address you might suspect (sizeof(vector) bytes after the
17753 // last requested load), but rather sizeof(vector) - 1 bytes after the
17754 // last requested vector. The point of this is to avoid a page fault if
17755 // the base address happened to be aligned. This works because if the
17756 // base address is aligned, then adding less than a full vector length
17757 // will cause the last vector in the sequence to be (re)loaded.
17758 // Otherwise, the next vector will be fetched as you might suspect was
17759 // necessary.
17760
17761 // We might be able to reuse the permutation generation from
17762 // a different base address offset from this one by an aligned amount.
17763 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17764 // optimization later.
17765 Intrinsic::ID Intr, IntrLD, IntrPerm;
17766 MVT PermCntlTy, PermTy, LDTy;
17767 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17768 : Intrinsic::ppc_altivec_lvsl;
17769 IntrLD = Intrinsic::ppc_altivec_lvx;
17770 IntrPerm = Intrinsic::ppc_altivec_vperm;
17771 PermCntlTy = MVT::v16i8;
17772 PermTy = MVT::v4i32;
17773 LDTy = MVT::v4i32;
17774
17775 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
17776
17777 // Create the new MMO for the new base load. It is like the original MMO,
17778 // but represents an area in memory almost twice the vector size centered
17779 // on the original address. If the address is unaligned, we might start
17780 // reading up to (sizeof(vector)-1) bytes below the address of the
17781 // original unaligned load.
17783 MachineMemOperand *BaseMMO =
17784 MF.getMachineMemOperand(LD->getMemOperand(),
17785 -(int64_t)MemVT.getStoreSize()+1,
17786 2*MemVT.getStoreSize()-1);
17787
17788 // Create the new base load.
17789 SDValue LDXIntID =
17790 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
17791 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17792 SDValue BaseLoad =
17794 DAG.getVTList(PermTy, MVT::Other),
17795 BaseLoadOps, LDTy, BaseMMO);
17796
17797 // Note that the value of IncOffset (which is provided to the next
17798 // load's pointer info offset value, and thus used to calculate the
17799 // alignment), and the value of IncValue (which is actually used to
17800 // increment the pointer value) are different! This is because we
17801 // require the next load to appear to be aligned, even though it
17802 // is actually offset from the base pointer by a lesser amount.
17803 int IncOffset = VT.getSizeInBits() / 8;
17804 int IncValue = IncOffset;
17805
17806 // Walk (both up and down) the chain looking for another load at the real
17807 // (aligned) offset (the alignment of the other load does not matter in
17808 // this case). If found, then do not use the offset reduction trick, as
17809 // that will prevent the loads from being later combined (as they would
17810 // otherwise be duplicates).
17811 if (!findConsecutiveLoad(LD, DAG))
17812 --IncValue;
17813
17815 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
17816 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
17817
17818 MachineMemOperand *ExtraMMO =
17819 MF.getMachineMemOperand(LD->getMemOperand(),
17820 1, 2*MemVT.getStoreSize()-1);
17821 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17822 SDValue ExtraLoad =
17824 DAG.getVTList(PermTy, MVT::Other),
17825 ExtraLoadOps, LDTy, ExtraMMO);
17826
17827 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17828 BaseLoad.getValue(1), ExtraLoad.getValue(1));
17829
17830 // Because vperm has a big-endian bias, we must reverse the order
17831 // of the input vectors and complement the permute control vector
17832 // when generating little endian code. We have already handled the
17833 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17834 // and ExtraLoad here.
17835 SDValue Perm;
17836 if (isLittleEndian)
17837 Perm = BuildIntrinsicOp(IntrPerm,
17838 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
17839 else
17840 Perm = BuildIntrinsicOp(IntrPerm,
17841 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
17842
17843 if (VT != PermTy)
17844 Perm = Subtarget.hasAltivec()
17845 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
17846 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
17847 DAG.getTargetConstant(1, dl, MVT::i64));
17848 // second argument is 1 because this rounding
17849 // is always exact.
17850
17851 // The output of the permutation is our loaded result, the TokenFactor is
17852 // our new chain.
17853 DCI.CombineTo(N, Perm, TF);
17854 return SDValue(N, 0);
17855 }
17856 }
17857 break;
17859 bool isLittleEndian = Subtarget.isLittleEndian();
17860 unsigned IID = N->getConstantOperandVal(0);
17861 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17862 : Intrinsic::ppc_altivec_lvsl);
17863 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
17864 SDValue Add = N->getOperand(1);
17865
17866 int Bits = 4 /* 16 byte alignment */;
17867
17868 if (DAG.MaskedValueIsZero(Add->getOperand(1),
17869 APInt::getAllOnes(Bits /* alignment */)
17870 .zext(Add.getScalarValueSizeInBits()))) {
17871 SDNode *BasePtr = Add->getOperand(0).getNode();
17872 for (SDNode *U : BasePtr->users()) {
17873 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17874 U->getConstantOperandVal(0) == IID) {
17875 // We've found another LVSL/LVSR, and this address is an aligned
17876 // multiple of that one. The results will be the same, so use the
17877 // one we've just found instead.
17878
17879 return SDValue(U, 0);
17880 }
17881 }
17882 }
17883
17884 if (isa<ConstantSDNode>(Add->getOperand(1))) {
17885 SDNode *BasePtr = Add->getOperand(0).getNode();
17886 for (SDNode *U : BasePtr->users()) {
17887 if (U->getOpcode() == ISD::ADD &&
17888 isa<ConstantSDNode>(U->getOperand(1)) &&
17889 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
17890 (1ULL << Bits) ==
17891 0) {
17892 SDNode *OtherAdd = U;
17893 for (SDNode *V : OtherAdd->users()) {
17894 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17895 V->getConstantOperandVal(0) == IID) {
17896 return SDValue(V, 0);
17897 }
17898 }
17899 }
17900 }
17901 }
17902 }
17903
17904 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17905 // Expose the vabsduw/h/b opportunity for down stream
17906 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17907 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17908 IID == Intrinsic::ppc_altivec_vmaxsh ||
17909 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17910 SDValue V1 = N->getOperand(1);
17911 SDValue V2 = N->getOperand(2);
17912 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17913 V1.getSimpleValueType() == MVT::v8i16 ||
17914 V1.getSimpleValueType() == MVT::v16i8) &&
17916 // (0-a, a)
17917 if (V1.getOpcode() == ISD::SUB &&
17919 V1.getOperand(1) == V2) {
17920 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
17921 }
17922 // (a, 0-a)
17923 if (V2.getOpcode() == ISD::SUB &&
17925 V2.getOperand(1) == V1) {
17926 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17927 }
17928 // (x-y, y-x)
17929 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17930 V1.getOperand(0) == V2.getOperand(1) &&
17931 V1.getOperand(1) == V2.getOperand(0)) {
17932 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17933 }
17934 }
17935 }
17936 }
17937
17938 break;
17940 switch (N->getConstantOperandVal(1)) {
17941 default:
17942 break;
17943 case Intrinsic::ppc_altivec_vsum4sbs:
17944 case Intrinsic::ppc_altivec_vsum4shs:
17945 case Intrinsic::ppc_altivec_vsum4ubs: {
17946 // These sum-across intrinsics only have a chain due to the side effect
17947 // that they may set the SAT bit. If we know the SAT bit will not be set
17948 // for some inputs, we can replace any uses of their chain with the
17949 // input chain.
17950 if (BuildVectorSDNode *BVN =
17951 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
17952 APInt APSplatBits, APSplatUndef;
17953 unsigned SplatBitSize;
17954 bool HasAnyUndefs;
17955 bool BVNIsConstantSplat = BVN->isConstantSplat(
17956 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
17957 !Subtarget.isLittleEndian());
17958 // If the constant splat vector is 0, the SAT bit will not be set.
17959 if (BVNIsConstantSplat && APSplatBits == 0)
17960 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
17961 }
17962 return SDValue();
17963 }
17964 case Intrinsic::ppc_vsx_lxvw4x:
17965 case Intrinsic::ppc_vsx_lxvd2x:
17966 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17967 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17968 if (Subtarget.needsSwapsForVSXMemOps())
17969 return expandVSXLoadForLE(N, DCI);
17970 break;
17971 }
17972 break;
17974 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17975 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17976 if (Subtarget.needsSwapsForVSXMemOps()) {
17977 switch (N->getConstantOperandVal(1)) {
17978 default:
17979 break;
17980 case Intrinsic::ppc_vsx_stxvw4x:
17981 case Intrinsic::ppc_vsx_stxvd2x:
17982 return expandVSXStoreForLE(N, DCI);
17983 }
17984 }
17985 break;
17986 case ISD::BSWAP: {
17987 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17988 // For subtargets without LDBRX, we can still do better than the default
17989 // expansion even for 64-bit BSWAP (LOAD).
17990 bool Is64BitBswapOn64BitTgt =
17991 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
17992 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
17993 N->getOperand(0).hasOneUse();
17994 if (IsSingleUseNormalLd &&
17995 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
17996 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17997 SDValue Load = N->getOperand(0);
17998 LoadSDNode *LD = cast<LoadSDNode>(Load);
17999 // Create the byte-swapping load.
18000 SDValue Ops[] = {
18001 LD->getChain(), // Chain
18002 LD->getBasePtr(), // Ptr
18003 DAG.getValueType(N->getValueType(0)) // VT
18004 };
18005 SDValue BSLoad =
18006 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
18007 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
18008 MVT::i64 : MVT::i32, MVT::Other),
18009 Ops, LD->getMemoryVT(), LD->getMemOperand());
18010
18011 // If this is an i16 load, insert the truncate.
18012 SDValue ResVal = BSLoad;
18013 if (N->getValueType(0) == MVT::i16)
18014 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
18015
18016 // First, combine the bswap away. This makes the value produced by the
18017 // load dead.
18018 DCI.CombineTo(N, ResVal);
18019
18020 // Next, combine the load away, we give it a bogus result value but a real
18021 // chain result. The result value is dead because the bswap is dead.
18022 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
18023
18024 // Return N so it doesn't get rechecked!
18025 return SDValue(N, 0);
18026 }
18027 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
18028 // before legalization so that the BUILD_PAIR is handled correctly.
18029 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
18030 !IsSingleUseNormalLd)
18031 return SDValue();
18032 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
18033
18034 // Can't split volatile or atomic loads.
18035 if (!LD->isSimple())
18036 return SDValue();
18037 SDValue BasePtr = LD->getBasePtr();
18038 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
18039 LD->getPointerInfo(), LD->getAlign());
18040 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
18041 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
18042 DAG.getIntPtrConstant(4, dl));
18044 LD->getMemOperand(), 4, 4);
18045 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
18046 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
18047 SDValue Res;
18048 if (Subtarget.isLittleEndian())
18049 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
18050 else
18051 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
18052 SDValue TF =
18053 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18054 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
18055 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
18056 return Res;
18057 }
18058 case PPCISD::VCMP:
18059 // If a VCMP_rec node already exists with exactly the same operands as this
18060 // node, use its result instead of this node (VCMP_rec computes both a CR6
18061 // and a normal output).
18062 //
18063 if (!N->getOperand(0).hasOneUse() &&
18064 !N->getOperand(1).hasOneUse() &&
18065 !N->getOperand(2).hasOneUse()) {
18066
18067 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
18068 SDNode *VCMPrecNode = nullptr;
18069
18070 SDNode *LHSN = N->getOperand(0).getNode();
18071 for (SDNode *User : LHSN->users())
18072 if (User->getOpcode() == PPCISD::VCMP_rec &&
18073 User->getOperand(1) == N->getOperand(1) &&
18074 User->getOperand(2) == N->getOperand(2) &&
18075 User->getOperand(0) == N->getOperand(0)) {
18076 VCMPrecNode = User;
18077 break;
18078 }
18079
18080 // If there is no VCMP_rec node, or if the flag value has a single use,
18081 // don't transform this.
18082 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
18083 break;
18084
18085 // Look at the (necessarily single) use of the flag value. If it has a
18086 // chain, this transformation is more complex. Note that multiple things
18087 // could use the value result, which we should ignore.
18088 SDNode *FlagUser = nullptr;
18089 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
18090 FlagUser == nullptr; ++UI) {
18091 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
18092 SDNode *User = UI->getUser();
18093 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
18094 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
18095 FlagUser = User;
18096 break;
18097 }
18098 }
18099 }
18100
18101 // If the user is a MFOCRF instruction, we know this is safe.
18102 // Otherwise we give up for right now.
18103 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
18104 return SDValue(VCMPrecNode, 0);
18105 }
18106 break;
18107 case ISD::BR_CC: {
18108 // If this is a branch on an altivec predicate comparison, lower this so
18109 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
18110 // lowering is done pre-legalize, because the legalizer lowers the predicate
18111 // compare down to code that is difficult to reassemble.
18112 // This code also handles branches that depend on the result of a store
18113 // conditional.
18114 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18115 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
18116
18117 int CompareOpc;
18118 bool isDot;
18119
18120 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
18121 break;
18122
18123 // Since we are doing this pre-legalize, the RHS can be a constant of
18124 // arbitrary bitwidth which may cause issues when trying to get the value
18125 // from the underlying APInt.
18126 auto RHSAPInt = RHS->getAsAPIntVal();
18127 if (!RHSAPInt.isIntN(64))
18128 break;
18129
18130 unsigned Val = RHSAPInt.getZExtValue();
18131 auto isImpossibleCompare = [&]() {
18132 // If this is a comparison against something other than 0/1, then we know
18133 // that the condition is never/always true.
18134 if (Val != 0 && Val != 1) {
18135 if (CC == ISD::SETEQ) // Cond never true, remove branch.
18136 return N->getOperand(0);
18137 // Always !=, turn it into an unconditional branch.
18138 return DAG.getNode(ISD::BR, dl, MVT::Other,
18139 N->getOperand(0), N->getOperand(4));
18140 }
18141 return SDValue();
18142 };
18143 // Combine branches fed by store conditional instructions (st[bhwd]cx).
18144 unsigned StoreWidth = 0;
18145 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
18146 isStoreConditional(LHS, StoreWidth)) {
18147 if (SDValue Impossible = isImpossibleCompare())
18148 return Impossible;
18149 PPC::Predicate CompOpc;
18150 // eq 0 => ne
18151 // ne 0 => eq
18152 // eq 1 => eq
18153 // ne 1 => ne
18154 if (Val == 0)
18155 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
18156 else
18157 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
18158
18159 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
18160 DAG.getConstant(StoreWidth, dl, MVT::i32)};
18161 auto *MemNode = cast<MemSDNode>(LHS);
18162 SDValue ConstSt = DAG.getMemIntrinsicNode(
18163 PPCISD::STORE_COND, dl,
18164 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
18165 MemNode->getMemoryVT(), MemNode->getMemOperand());
18166
18167 SDValue InChain;
18168 // Unchain the branch from the original store conditional.
18169 if (N->getOperand(0) == LHS.getValue(1))
18170 InChain = LHS.getOperand(0);
18171 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
18172 SmallVector<SDValue, 4> InChains;
18173 SDValue InTF = N->getOperand(0);
18174 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
18175 if (InTF.getOperand(i) != LHS.getValue(1))
18176 InChains.push_back(InTF.getOperand(i));
18177 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
18178 }
18179
18180 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
18181 DAG.getConstant(CompOpc, dl, MVT::i32),
18182 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
18183 ConstSt.getValue(2));
18184 }
18185
18186 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18187 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
18188 assert(isDot && "Can't compare against a vector result!");
18189
18190 if (SDValue Impossible = isImpossibleCompare())
18191 return Impossible;
18192
18193 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
18194 // Create the PPCISD altivec 'dot' comparison node.
18195 SDValue Ops[] = {
18196 LHS.getOperand(2), // LHS of compare
18197 LHS.getOperand(3), // RHS of compare
18198 DAG.getConstant(CompareOpc, dl, MVT::i32)
18199 };
18200 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
18201 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
18202
18203 // Unpack the result based on how the target uses it.
18204 PPC::Predicate CompOpc;
18205 switch (LHS.getConstantOperandVal(1)) {
18206 default: // Can't happen, don't crash on invalid number though.
18207 case 0: // Branch on the value of the EQ bit of CR6.
18208 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
18209 break;
18210 case 1: // Branch on the inverted value of the EQ bit of CR6.
18211 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
18212 break;
18213 case 2: // Branch on the value of the LT bit of CR6.
18214 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
18215 break;
18216 case 3: // Branch on the inverted value of the LT bit of CR6.
18217 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
18218 break;
18219 }
18220
18221 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
18222 DAG.getConstant(CompOpc, dl, MVT::i32),
18223 DAG.getRegister(PPC::CR6, MVT::i32),
18224 N->getOperand(4), CompNode.getValue(1));
18225 }
18226 break;
18227 }
18228 case ISD::BUILD_VECTOR:
18229 return DAGCombineBuildVector(N, DCI);
18230 case PPCISD::ADDC:
18231 return DAGCombineAddc(N, DCI);
18232
18233 case ISD::BITCAST:
18234 return DAGCombineBitcast(N, DCI);
18235 }
18236
18237 return SDValue();
18238}
18239
18240SDValue
18242 SelectionDAG &DAG,
18243 SmallVectorImpl<SDNode *> &Created) const {
18244 // fold (sdiv X, pow2)
18245 EVT VT = N->getValueType(0);
18246 if (VT == MVT::i64 && !Subtarget.isPPC64())
18247 return SDValue();
18248 if ((VT != MVT::i32 && VT != MVT::i64) ||
18249 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
18250 return SDValue();
18251
18252 SDLoc DL(N);
18253 SDValue N0 = N->getOperand(0);
18254
18255 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
18256 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
18257 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
18258
18259 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
18260 Created.push_back(Op.getNode());
18261
18262 if (IsNegPow2) {
18263 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
18264 Created.push_back(Op.getNode());
18265 }
18266
18267 return Op;
18268}
18269
18270//===----------------------------------------------------------------------===//
18271// Inline Assembly Support
18272//===----------------------------------------------------------------------===//
18273
18275 KnownBits &Known,
18276 const APInt &DemandedElts,
18277 const SelectionDAG &DAG,
18278 unsigned Depth) const {
18279 Known.resetAll();
18280 switch (Op.getOpcode()) {
18281 default: break;
18282 case PPCISD::LBRX: {
18283 // lhbrx is known to have the top bits cleared out.
18284 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
18285 Known.Zero = 0xFFFF0000;
18286 break;
18287 }
18288 case PPCISD::ADDE: {
18289 if (Op.getResNo() == 0) {
18290 // (0|1), _ = ADDE 0, 0, CARRY
18291 SDValue LHS = Op.getOperand(0);
18292 SDValue RHS = Op.getOperand(1);
18293 if (isNullConstant(LHS) && isNullConstant(RHS))
18294 Known.Zero = ~1ULL;
18295 }
18296 break;
18297 }
18299 switch (Op.getConstantOperandVal(0)) {
18300 default: break;
18301 case Intrinsic::ppc_altivec_vcmpbfp_p:
18302 case Intrinsic::ppc_altivec_vcmpeqfp_p:
18303 case Intrinsic::ppc_altivec_vcmpequb_p:
18304 case Intrinsic::ppc_altivec_vcmpequh_p:
18305 case Intrinsic::ppc_altivec_vcmpequw_p:
18306 case Intrinsic::ppc_altivec_vcmpequd_p:
18307 case Intrinsic::ppc_altivec_vcmpequq_p:
18308 case Intrinsic::ppc_altivec_vcmpgefp_p:
18309 case Intrinsic::ppc_altivec_vcmpgtfp_p:
18310 case Intrinsic::ppc_altivec_vcmpgtsb_p:
18311 case Intrinsic::ppc_altivec_vcmpgtsh_p:
18312 case Intrinsic::ppc_altivec_vcmpgtsw_p:
18313 case Intrinsic::ppc_altivec_vcmpgtsd_p:
18314 case Intrinsic::ppc_altivec_vcmpgtsq_p:
18315 case Intrinsic::ppc_altivec_vcmpgtub_p:
18316 case Intrinsic::ppc_altivec_vcmpgtuh_p:
18317 case Intrinsic::ppc_altivec_vcmpgtuw_p:
18318 case Intrinsic::ppc_altivec_vcmpgtud_p:
18319 case Intrinsic::ppc_altivec_vcmpgtuq_p:
18320 Known.Zero = ~1U; // All bits but the low one are known to be zero.
18321 break;
18322 }
18323 break;
18324 }
18326 switch (Op.getConstantOperandVal(1)) {
18327 default:
18328 break;
18329 case Intrinsic::ppc_load2r:
18330 // Top bits are cleared for load2r (which is the same as lhbrx).
18331 Known.Zero = 0xFFFF0000;
18332 break;
18333 }
18334 break;
18335 }
18336 }
18337}
18338
18340 switch (Subtarget.getCPUDirective()) {
18341 default: break;
18342 case PPC::DIR_970:
18343 case PPC::DIR_PWR4:
18344 case PPC::DIR_PWR5:
18345 case PPC::DIR_PWR5X:
18346 case PPC::DIR_PWR6:
18347 case PPC::DIR_PWR6X:
18348 case PPC::DIR_PWR7:
18349 case PPC::DIR_PWR8:
18350 case PPC::DIR_PWR9:
18351 case PPC::DIR_PWR10:
18352 case PPC::DIR_PWR11:
18353 case PPC::DIR_PWR_FUTURE: {
18354 if (!ML)
18355 break;
18356
18358 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18359 // so that we can decrease cache misses and branch-prediction misses.
18360 // Actual alignment of the loop will depend on the hotness check and other
18361 // logic in alignBlocks.
18362 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
18363 return Align(32);
18364 }
18365
18366 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18367
18368 // For small loops (between 5 and 8 instructions), align to a 32-byte
18369 // boundary so that the entire loop fits in one instruction-cache line.
18370 uint64_t LoopSize = 0;
18371 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18372 for (const MachineInstr &J : **I) {
18373 LoopSize += TII->getInstSizeInBytes(J);
18374 if (LoopSize > 32)
18375 break;
18376 }
18377
18378 if (LoopSize > 16 && LoopSize <= 32)
18379 return Align(32);
18380
18381 break;
18382 }
18383 }
18384
18386}
18387
18388/// getConstraintType - Given a constraint, return the type of
18389/// constraint it is for this target.
18392 if (Constraint.size() == 1) {
18393 switch (Constraint[0]) {
18394 default: break;
18395 case 'b':
18396 case 'r':
18397 case 'f':
18398 case 'd':
18399 case 'v':
18400 case 'y':
18401 return C_RegisterClass;
18402 case 'Z':
18403 // FIXME: While Z does indicate a memory constraint, it specifically
18404 // indicates an r+r address (used in conjunction with the 'y' modifier
18405 // in the replacement string). Currently, we're forcing the base
18406 // register to be r0 in the asm printer (which is interpreted as zero)
18407 // and forming the complete address in the second register. This is
18408 // suboptimal.
18409 return C_Memory;
18410 }
18411 } else if (Constraint == "wc") { // individual CR bits.
18412 return C_RegisterClass;
18413 } else if (Constraint == "wa" || Constraint == "wd" ||
18414 Constraint == "wf" || Constraint == "ws" ||
18415 Constraint == "wi" || Constraint == "ww") {
18416 return C_RegisterClass; // VSX registers.
18417 }
18418 return TargetLowering::getConstraintType(Constraint);
18419}
18420
18421/// Examine constraint type and operand type and determine a weight value.
18422/// This object must already have been set up with the operand type
18423/// and the current alternative constraint selected.
18426 AsmOperandInfo &info, const char *constraint) const {
18428 Value *CallOperandVal = info.CallOperandVal;
18429 // If we don't have a value, we can't do a match,
18430 // but allow it at the lowest weight.
18431 if (!CallOperandVal)
18432 return CW_Default;
18433 Type *type = CallOperandVal->getType();
18434
18435 // Look at the constraint type.
18436 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
18437 return CW_Register; // an individual CR bit.
18438 else if ((StringRef(constraint) == "wa" ||
18439 StringRef(constraint) == "wd" ||
18440 StringRef(constraint) == "wf") &&
18441 type->isVectorTy())
18442 return CW_Register;
18443 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
18444 return CW_Register; // just hold 64-bit integers data.
18445 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18446 return CW_Register;
18447 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18448 return CW_Register;
18449
18450 switch (*constraint) {
18451 default:
18453 break;
18454 case 'b':
18455 if (type->isIntegerTy())
18456 weight = CW_Register;
18457 break;
18458 case 'f':
18459 if (type->isFloatTy())
18460 weight = CW_Register;
18461 break;
18462 case 'd':
18463 if (type->isDoubleTy())
18464 weight = CW_Register;
18465 break;
18466 case 'v':
18467 if (type->isVectorTy())
18468 weight = CW_Register;
18469 break;
18470 case 'y':
18471 weight = CW_Register;
18472 break;
18473 case 'Z':
18474 weight = CW_Memory;
18475 break;
18476 }
18477 return weight;
18478}
18479
18480std::pair<unsigned, const TargetRegisterClass *>
18482 StringRef Constraint,
18483 MVT VT) const {
18484 if (Constraint.size() == 1) {
18485 // GCC RS6000 Constraint Letters
18486 switch (Constraint[0]) {
18487 case 'b': // R1-R31
18488 if (VT == MVT::i64 && Subtarget.isPPC64())
18489 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
18490 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
18491 case 'r': // R0-R31
18492 if (VT == MVT::i64 && Subtarget.isPPC64())
18493 return std::make_pair(0U, &PPC::G8RCRegClass);
18494 return std::make_pair(0U, &PPC::GPRCRegClass);
18495 // 'd' and 'f' constraints are both defined to be "the floating point
18496 // registers", where one is for 32-bit and the other for 64-bit. We don't
18497 // really care overly much here so just give them all the same reg classes.
18498 case 'd':
18499 case 'f':
18500 if (Subtarget.hasSPE()) {
18501 if (VT == MVT::f32 || VT == MVT::i32)
18502 return std::make_pair(0U, &PPC::GPRCRegClass);
18503 if (VT == MVT::f64 || VT == MVT::i64)
18504 return std::make_pair(0U, &PPC::SPERCRegClass);
18505 } else {
18506 if (VT == MVT::f32 || VT == MVT::i32)
18507 return std::make_pair(0U, &PPC::F4RCRegClass);
18508 if (VT == MVT::f64 || VT == MVT::i64)
18509 return std::make_pair(0U, &PPC::F8RCRegClass);
18510 }
18511 break;
18512 case 'v':
18513 if (Subtarget.hasAltivec() && VT.isVector())
18514 return std::make_pair(0U, &PPC::VRRCRegClass);
18515 else if (Subtarget.hasVSX())
18516 // Scalars in Altivec registers only make sense with VSX.
18517 return std::make_pair(0U, &PPC::VFRCRegClass);
18518 break;
18519 case 'y': // crrc
18520 return std::make_pair(0U, &PPC::CRRCRegClass);
18521 }
18522 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18523 // An individual CR bit.
18524 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18525 } else if ((Constraint == "wa" || Constraint == "wd" ||
18526 Constraint == "wf" || Constraint == "wi") &&
18527 Subtarget.hasVSX()) {
18528 // A VSX register for either a scalar (FP) or vector. There is no
18529 // support for single precision scalars on subtargets prior to Power8.
18530 if (VT.isVector())
18531 return std::make_pair(0U, &PPC::VSRCRegClass);
18532 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18533 return std::make_pair(0U, &PPC::VSSRCRegClass);
18534 return std::make_pair(0U, &PPC::VSFRCRegClass);
18535 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18536 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18537 return std::make_pair(0U, &PPC::VSSRCRegClass);
18538 else
18539 return std::make_pair(0U, &PPC::VSFRCRegClass);
18540 } else if (Constraint == "lr") {
18541 if (VT == MVT::i64)
18542 return std::make_pair(0U, &PPC::LR8RCRegClass);
18543 else
18544 return std::make_pair(0U, &PPC::LRRCRegClass);
18545 }
18546
18547 // Handle special cases of physical registers that are not properly handled
18548 // by the base class.
18549 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18550 // If we name a VSX register, we can't defer to the base class because it
18551 // will not recognize the correct register (their names will be VSL{0-31}
18552 // and V{0-31} so they won't match). So we match them here.
18553 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18554 int VSNum = atoi(Constraint.data() + 3);
18555 assert(VSNum >= 0 && VSNum <= 63 &&
18556 "Attempted to access a vsr out of range");
18557 if (VSNum < 32)
18558 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18559 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18560 }
18561
18562 // For float registers, we can't defer to the base class as it will match
18563 // the SPILLTOVSRRC class.
18564 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18565 int RegNum = atoi(Constraint.data() + 2);
18566 if (RegNum > 31 || RegNum < 0)
18567 report_fatal_error("Invalid floating point register number");
18568 if (VT == MVT::f32 || VT == MVT::i32)
18569 return Subtarget.hasSPE()
18570 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18571 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18572 if (VT == MVT::f64 || VT == MVT::i64)
18573 return Subtarget.hasSPE()
18574 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18575 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18576 }
18577 }
18578
18579 std::pair<unsigned, const TargetRegisterClass *> R =
18581
18582 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18583 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18584 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18585 // register.
18586 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18587 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18588 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18589 PPC::GPRCRegClass.contains(R.first))
18590 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18591 PPC::sub_32, &PPC::G8RCRegClass),
18592 &PPC::G8RCRegClass);
18593
18594 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18595 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18596 R.first = PPC::CR0;
18597 R.second = &PPC::CRRCRegClass;
18598 }
18599 // FIXME: This warning should ideally be emitted in the front end.
18600 const auto &TM = getTargetMachine();
18601 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18602 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18603 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18604 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18605 errs() << "warning: vector registers 20 to 32 are reserved in the "
18606 "default AIX AltiVec ABI and cannot be used\n";
18607 }
18608
18609 return R;
18610}
18611
18612/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18613/// vector. If it is invalid, don't add anything to Ops.
18615 StringRef Constraint,
18616 std::vector<SDValue> &Ops,
18617 SelectionDAG &DAG) const {
18618 SDValue Result;
18619
18620 // Only support length 1 constraints.
18621 if (Constraint.size() > 1)
18622 return;
18623
18624 char Letter = Constraint[0];
18625 switch (Letter) {
18626 default: break;
18627 case 'I':
18628 case 'J':
18629 case 'K':
18630 case 'L':
18631 case 'M':
18632 case 'N':
18633 case 'O':
18634 case 'P': {
18636 if (!CST) return; // Must be an immediate to match.
18637 SDLoc dl(Op);
18638 int64_t Value = CST->getSExtValue();
18639 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18640 // numbers are printed as such.
18641 switch (Letter) {
18642 default: llvm_unreachable("Unknown constraint letter!");
18643 case 'I': // "I" is a signed 16-bit constant.
18644 if (isInt<16>(Value))
18645 Result = DAG.getTargetConstant(Value, dl, TCVT);
18646 break;
18647 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18649 Result = DAG.getTargetConstant(Value, dl, TCVT);
18650 break;
18651 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18653 Result = DAG.getTargetConstant(Value, dl, TCVT);
18654 break;
18655 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18656 if (isUInt<16>(Value))
18657 Result = DAG.getTargetConstant(Value, dl, TCVT);
18658 break;
18659 case 'M': // "M" is a constant that is greater than 31.
18660 if (Value > 31)
18661 Result = DAG.getTargetConstant(Value, dl, TCVT);
18662 break;
18663 case 'N': // "N" is a positive constant that is an exact power of two.
18664 if (Value > 0 && isPowerOf2_64(Value))
18665 Result = DAG.getTargetConstant(Value, dl, TCVT);
18666 break;
18667 case 'O': // "O" is the constant zero.
18668 if (Value == 0)
18669 Result = DAG.getTargetConstant(Value, dl, TCVT);
18670 break;
18671 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18672 if (isInt<16>(-Value))
18673 Result = DAG.getTargetConstant(Value, dl, TCVT);
18674 break;
18675 }
18676 break;
18677 }
18678 }
18679
18680 if (Result.getNode()) {
18681 Ops.push_back(Result);
18682 return;
18683 }
18684
18685 // Handle standard constraint letters.
18687}
18688
18691 SelectionDAG &DAG) const {
18692 if (I.getNumOperands() <= 1)
18693 return;
18694 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18695 return;
18696 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18697 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18698 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18699 return;
18700
18701 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18702 Ops.push_back(DAG.getMDNode(MDN));
18703}
18704
18705// isLegalAddressingMode - Return true if the addressing mode represented
18706// by AM is legal for this target, for a load/store of the specified type.
18708 const AddrMode &AM, Type *Ty,
18709 unsigned AS,
18710 Instruction *I) const {
18711 // Vector type r+i form is supported since power9 as DQ form. We don't check
18712 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18713 // imm form is preferred and the offset can be adjusted to use imm form later
18714 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18715 // max offset to check legal addressing mode, we should be a little aggressive
18716 // to contain other offsets for that LSRUse.
18717 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18718 return false;
18719
18720 // PPC allows a sign-extended 16-bit immediate field.
18721 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18722 return false;
18723
18724 // No global is ever allowed as a base.
18725 if (AM.BaseGV)
18726 return false;
18727
18728 // PPC only support r+r,
18729 switch (AM.Scale) {
18730 case 0: // "r+i" or just "i", depending on HasBaseReg.
18731 break;
18732 case 1:
18733 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18734 return false;
18735 // Otherwise we have r+r or r+i.
18736 break;
18737 case 2:
18738 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18739 return false;
18740 // Allow 2*r as r+r.
18741 break;
18742 default:
18743 // No other scales are supported.
18744 return false;
18745 }
18746
18747 return true;
18748}
18749
18750SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18751 SelectionDAG &DAG) const {
18753 MachineFrameInfo &MFI = MF.getFrameInfo();
18754 MFI.setReturnAddressIsTaken(true);
18755
18756 SDLoc dl(Op);
18757 unsigned Depth = Op.getConstantOperandVal(0);
18758
18759 // Make sure the function does not optimize away the store of the RA to
18760 // the stack.
18761 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18762 FuncInfo->setLRStoreRequired();
18763 auto PtrVT = getPointerTy(MF.getDataLayout());
18764
18765 if (Depth > 0) {
18766 // The link register (return address) is saved in the caller's frame
18767 // not the callee's stack frame. So we must get the caller's frame
18768 // address and load the return address at the LR offset from there.
18769 SDValue FrameAddr =
18770 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18772 SDValue Offset =
18773 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18774 Subtarget.getScalarIntVT());
18775 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
18776 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
18778 }
18779
18780 // Just load the return address off the stack.
18781 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18782 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
18784}
18785
18786SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18787 SelectionDAG &DAG) const {
18788 SDLoc dl(Op);
18789 unsigned Depth = Op.getConstantOperandVal(0);
18790
18791 MachineFunction &MF = DAG.getMachineFunction();
18792 MachineFrameInfo &MFI = MF.getFrameInfo();
18793 MFI.setFrameAddressIsTaken(true);
18794
18795 EVT PtrVT = getPointerTy(MF.getDataLayout());
18796 bool isPPC64 = PtrVT == MVT::i64;
18797
18798 // Naked functions never have a frame pointer, and so we use r1. For all
18799 // other functions, this decision must be delayed until during PEI.
18800 unsigned FrameReg;
18801 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
18802 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18803 else
18804 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18805
18806 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
18807 PtrVT);
18808 while (Depth--)
18809 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18810 FrameAddr, MachinePointerInfo());
18811 return FrameAddr;
18812}
18813
18814#define GET_REGISTER_MATCHER
18815#include "PPCGenAsmMatcher.inc"
18816
18818 const MachineFunction &MF) const {
18819 bool IsPPC64 = Subtarget.isPPC64();
18820
18821 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
18822 if (!Is64Bit && VT != LLT::scalar(32))
18823 report_fatal_error("Invalid register global variable type");
18824
18826 if (!Reg)
18827 return Reg;
18828
18829 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18830 // Need followup investigation as to why.
18831 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18832 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
18833 StringRef(RegName) + "\"."));
18834
18835 // Convert GPR to GP8R register for 64bit.
18836 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
18837 Reg = Reg.id() - PPC::R0 + PPC::X0;
18838
18839 return Reg;
18840}
18841
18843 // 32-bit SVR4 ABI access everything as got-indirect.
18844 if (Subtarget.is32BitELFABI())
18845 return true;
18846
18847 // AIX accesses everything indirectly through the TOC, which is similar to
18848 // the GOT.
18849 if (Subtarget.isAIXABI())
18850 return true;
18851
18853 // If it is small or large code model, module locals are accessed
18854 // indirectly by loading their address from .toc/.got.
18855 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18856 return true;
18857
18858 // JumpTable and BlockAddress are accessed as got-indirect.
18860 return true;
18861
18863 return Subtarget.isGVIndirectSymbol(G->getGlobal());
18864
18865 return false;
18866}
18867
18868bool
18870 // The PowerPC target isn't yet aware of offsets.
18871 return false;
18872}
18873
18876 MachineFunction &MF, unsigned Intrinsic) const {
18877 IntrinsicInfo Info;
18878 switch (Intrinsic) {
18879 case Intrinsic::ppc_atomicrmw_xchg_i128:
18880 case Intrinsic::ppc_atomicrmw_add_i128:
18881 case Intrinsic::ppc_atomicrmw_sub_i128:
18882 case Intrinsic::ppc_atomicrmw_nand_i128:
18883 case Intrinsic::ppc_atomicrmw_and_i128:
18884 case Intrinsic::ppc_atomicrmw_or_i128:
18885 case Intrinsic::ppc_atomicrmw_xor_i128:
18886 case Intrinsic::ppc_cmpxchg_i128:
18887 Info.opc = ISD::INTRINSIC_W_CHAIN;
18888 Info.memVT = MVT::i128;
18889 Info.ptrVal = I.getArgOperand(0);
18890 Info.offset = 0;
18891 Info.align = Align(16);
18894 Infos.push_back(Info);
18895 return;
18896 case Intrinsic::ppc_atomic_load_i128:
18897 Info.opc = ISD::INTRINSIC_W_CHAIN;
18898 Info.memVT = MVT::i128;
18899 Info.ptrVal = I.getArgOperand(0);
18900 Info.offset = 0;
18901 Info.align = Align(16);
18903 Infos.push_back(Info);
18904 return;
18905 case Intrinsic::ppc_atomic_store_i128:
18906 Info.opc = ISD::INTRINSIC_VOID;
18907 Info.memVT = MVT::i128;
18908 Info.ptrVal = I.getArgOperand(2);
18909 Info.offset = 0;
18910 Info.align = Align(16);
18912 Infos.push_back(Info);
18913 return;
18914 case Intrinsic::ppc_altivec_lvx:
18915 case Intrinsic::ppc_altivec_lvxl:
18916 case Intrinsic::ppc_altivec_lvebx:
18917 case Intrinsic::ppc_altivec_lvehx:
18918 case Intrinsic::ppc_altivec_lvewx:
18919 case Intrinsic::ppc_vsx_lxvd2x:
18920 case Intrinsic::ppc_vsx_lxvw4x:
18921 case Intrinsic::ppc_vsx_lxvd2x_be:
18922 case Intrinsic::ppc_vsx_lxvw4x_be:
18923 case Intrinsic::ppc_vsx_lxvl:
18924 case Intrinsic::ppc_vsx_lxvll: {
18925 EVT VT;
18926 switch (Intrinsic) {
18927 case Intrinsic::ppc_altivec_lvebx:
18928 VT = MVT::i8;
18929 break;
18930 case Intrinsic::ppc_altivec_lvehx:
18931 VT = MVT::i16;
18932 break;
18933 case Intrinsic::ppc_altivec_lvewx:
18934 VT = MVT::i32;
18935 break;
18936 case Intrinsic::ppc_vsx_lxvd2x:
18937 case Intrinsic::ppc_vsx_lxvd2x_be:
18938 VT = MVT::v2f64;
18939 break;
18940 default:
18941 VT = MVT::v4i32;
18942 break;
18943 }
18944
18945 Info.opc = ISD::INTRINSIC_W_CHAIN;
18946 Info.memVT = VT;
18947 Info.ptrVal = I.getArgOperand(0);
18948 Info.offset = -VT.getStoreSize()+1;
18949 Info.size = 2*VT.getStoreSize()-1;
18950 Info.align = Align(1);
18951 Info.flags = MachineMemOperand::MOLoad;
18952 Infos.push_back(Info);
18953 return;
18954 }
18955 case Intrinsic::ppc_altivec_stvx:
18956 case Intrinsic::ppc_altivec_stvxl:
18957 case Intrinsic::ppc_altivec_stvebx:
18958 case Intrinsic::ppc_altivec_stvehx:
18959 case Intrinsic::ppc_altivec_stvewx:
18960 case Intrinsic::ppc_vsx_stxvd2x:
18961 case Intrinsic::ppc_vsx_stxvw4x:
18962 case Intrinsic::ppc_vsx_stxvd2x_be:
18963 case Intrinsic::ppc_vsx_stxvw4x_be:
18964 case Intrinsic::ppc_vsx_stxvl:
18965 case Intrinsic::ppc_vsx_stxvll: {
18966 EVT VT;
18967 switch (Intrinsic) {
18968 case Intrinsic::ppc_altivec_stvebx:
18969 VT = MVT::i8;
18970 break;
18971 case Intrinsic::ppc_altivec_stvehx:
18972 VT = MVT::i16;
18973 break;
18974 case Intrinsic::ppc_altivec_stvewx:
18975 VT = MVT::i32;
18976 break;
18977 case Intrinsic::ppc_vsx_stxvd2x:
18978 case Intrinsic::ppc_vsx_stxvd2x_be:
18979 VT = MVT::v2f64;
18980 break;
18981 default:
18982 VT = MVT::v4i32;
18983 break;
18984 }
18985
18986 Info.opc = ISD::INTRINSIC_VOID;
18987 Info.memVT = VT;
18988 Info.ptrVal = I.getArgOperand(1);
18989 Info.offset = -VT.getStoreSize()+1;
18990 Info.size = 2*VT.getStoreSize()-1;
18991 Info.align = Align(1);
18992 Info.flags = MachineMemOperand::MOStore;
18993 Infos.push_back(Info);
18994 return;
18995 }
18996 case Intrinsic::ppc_stdcx:
18997 case Intrinsic::ppc_stwcx:
18998 case Intrinsic::ppc_sthcx:
18999 case Intrinsic::ppc_stbcx: {
19000 EVT VT;
19001 auto Alignment = Align(8);
19002 switch (Intrinsic) {
19003 case Intrinsic::ppc_stdcx:
19004 VT = MVT::i64;
19005 break;
19006 case Intrinsic::ppc_stwcx:
19007 VT = MVT::i32;
19008 Alignment = Align(4);
19009 break;
19010 case Intrinsic::ppc_sthcx:
19011 VT = MVT::i16;
19012 Alignment = Align(2);
19013 break;
19014 case Intrinsic::ppc_stbcx:
19015 VT = MVT::i8;
19016 Alignment = Align(1);
19017 break;
19018 }
19019 Info.opc = ISD::INTRINSIC_W_CHAIN;
19020 Info.memVT = VT;
19021 Info.ptrVal = I.getArgOperand(0);
19022 Info.offset = 0;
19023 Info.align = Alignment;
19025 Infos.push_back(Info);
19026 return;
19027 }
19028 default:
19029 break;
19030 }
19031}
19032
19033/// It returns EVT::Other if the type should be determined using generic
19034/// target-independent logic.
19036 LLVMContext &Context, const MemOp &Op,
19037 const AttributeList &FuncAttributes) const {
19038 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
19039 // We should use Altivec/VSX loads and stores when available. For unaligned
19040 // addresses, unaligned VSX loads are only fast starting with the P8.
19041 if (Subtarget.hasAltivec() && Op.size() >= 16) {
19042 if (Op.isMemset() && Subtarget.hasVSX()) {
19043 uint64_t TailSize = Op.size() % 16;
19044 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
19045 // element if vector element type matches tail store. For tail size
19046 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
19047 if (TailSize > 2 && TailSize <= 4) {
19048 return MVT::v8i16;
19049 }
19050 return MVT::v4i32;
19051 }
19052 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
19053 return MVT::v4i32;
19054 }
19055 }
19056
19057 if (Subtarget.isPPC64()) {
19058 return MVT::i64;
19059 }
19060
19061 return MVT::i32;
19062}
19063
19064/// Returns true if it is beneficial to convert a load of a constant
19065/// to just the constant itself.
19067 Type *Ty) const {
19068 assert(Ty->isIntegerTy());
19069
19070 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19071 return !(BitSize == 0 || BitSize > 64);
19072}
19073
19075 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19076 return false;
19077 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
19078 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
19079 return NumBits1 == 64 && NumBits2 == 32;
19080}
19081
19083 if (!VT1.isInteger() || !VT2.isInteger())
19084 return false;
19085 unsigned NumBits1 = VT1.getSizeInBits();
19086 unsigned NumBits2 = VT2.getSizeInBits();
19087 return NumBits1 == 64 && NumBits2 == 32;
19088}
19089
19091 // Generally speaking, zexts are not free, but they are free when they can be
19092 // folded with other operations.
19093 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
19094 EVT MemVT = LD->getMemoryVT();
19095 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
19096 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
19097 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
19098 LD->getExtensionType() == ISD::ZEXTLOAD))
19099 return true;
19100 }
19101
19102 // FIXME: Add other cases...
19103 // - 32-bit shifts with a zext to i64
19104 // - zext after ctlz, bswap, etc.
19105 // - zext after and by a constant mask
19106
19107 return TargetLowering::isZExtFree(Val, VT2);
19108}
19109
19110bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
19111 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
19112 "invalid fpext types");
19113 // Extending to float128 is not free.
19114 if (DestVT == MVT::f128)
19115 return false;
19116 return true;
19117}
19118
19120 return isInt<16>(Imm) || isUInt<16>(Imm);
19121}
19122
19124 return isInt<16>(Imm) || isUInt<16>(Imm);
19125}
19126
19129 unsigned *Fast) const {
19131 return false;
19132
19133 // PowerPC supports unaligned memory access for simple non-vector types.
19134 // Although accessing unaligned addresses is not as efficient as accessing
19135 // aligned addresses, it is generally more efficient than manual expansion,
19136 // and generally only traps for software emulation when crossing page
19137 // boundaries.
19138
19139 if (!VT.isSimple())
19140 return false;
19141
19142 if (VT.isFloatingPoint() && !VT.isVector() &&
19143 !Subtarget.allowsUnalignedFPAccess())
19144 return false;
19145
19146 if (VT.getSimpleVT().isVector()) {
19147 if (Subtarget.hasVSX()) {
19148 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
19149 VT != MVT::v4f32 && VT != MVT::v4i32)
19150 return false;
19151 } else {
19152 return false;
19153 }
19154 }
19155
19156 if (VT == MVT::ppcf128)
19157 return false;
19158
19159 if (Fast)
19160 *Fast = 1;
19161
19162 return true;
19163}
19164
19166 SDValue C) const {
19167 // Check integral scalar types.
19168 if (!VT.isScalarInteger())
19169 return false;
19170 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
19171 if (!ConstNode->getAPIntValue().isSignedIntN(64))
19172 return false;
19173 // This transformation will generate >= 2 operations. But the following
19174 // cases will generate <= 2 instructions during ISEL. So exclude them.
19175 // 1. If the constant multiplier fits 16 bits, it can be handled by one
19176 // HW instruction, ie. MULLI
19177 // 2. If the multiplier after shifted fits 16 bits, an extra shift
19178 // instruction is needed than case 1, ie. MULLI and RLDICR
19179 int64_t Imm = ConstNode->getSExtValue();
19180 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
19181 Imm >>= Shift;
19182 if (isInt<16>(Imm))
19183 return false;
19184 uint64_t UImm = static_cast<uint64_t>(Imm);
19185 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
19186 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
19187 return true;
19188 }
19189 return false;
19190}
19191
19197
19199 Type *Ty) const {
19200 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
19201 return false;
19202 switch (Ty->getScalarType()->getTypeID()) {
19203 case Type::FloatTyID:
19204 case Type::DoubleTyID:
19205 return true;
19206 case Type::FP128TyID:
19207 return Subtarget.hasP9Vector();
19208 default:
19209 return false;
19210 }
19211}
19212
19213// FIXME: add more patterns which are not profitable to hoist.
19215 if (!I->hasOneUse())
19216 return true;
19217
19218 Instruction *User = I->user_back();
19219 assert(User && "A single use instruction with no uses.");
19220
19221 switch (I->getOpcode()) {
19222 case Instruction::FMul: {
19223 // Don't break FMA, PowerPC prefers FMA.
19224 if (User->getOpcode() != Instruction::FSub &&
19225 User->getOpcode() != Instruction::FAdd)
19226 return true;
19227
19229 const Function *F = I->getFunction();
19230 const DataLayout &DL = F->getDataLayout();
19231 Type *Ty = User->getOperand(0)->getType();
19232 bool AllowContract = I->getFastMathFlags().allowContract() &&
19233 User->getFastMathFlags().allowContract();
19234
19235 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
19237 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
19238 }
19239 case Instruction::Load: {
19240 // Don't break "store (load float*)" pattern, this pattern will be combined
19241 // to "store (load int32)" in later InstCombine pass. See function
19242 // combineLoadToOperationType. On PowerPC, loading a float point takes more
19243 // cycles than loading a 32 bit integer.
19244 LoadInst *LI = cast<LoadInst>(I);
19245 // For the loads that combineLoadToOperationType does nothing, like
19246 // ordered load, it should be profitable to hoist them.
19247 // For swifterror load, it can only be used for pointer to pointer type, so
19248 // later type check should get rid of this case.
19249 if (!LI->isUnordered())
19250 return true;
19251
19252 if (User->getOpcode() != Instruction::Store)
19253 return true;
19254
19255 if (I->getType()->getTypeID() != Type::FloatTyID)
19256 return true;
19257
19258 return false;
19259 }
19260 default:
19261 return true;
19262 }
19263 return true;
19264}
19265
19266const MCPhysReg *
19268 // LR is a callee-save register, but we must treat it as clobbered by any call
19269 // site. Hence we include LR in the scratch registers, which are in turn added
19270 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
19271 // to CTR, which is used by any indirect call.
19272 static const MCPhysReg ScratchRegs[] = {
19273 PPC::X12, PPC::LR8, PPC::CTR8, 0
19274 };
19275
19276 return ScratchRegs;
19277}
19278
19280 const Constant *PersonalityFn) const {
19281 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
19282}
19283
19285 const Constant *PersonalityFn) const {
19286 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
19287}
19288
19289bool
19291 EVT VT , unsigned DefinedValues) const {
19292 if (VT == MVT::v2i64)
19293 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
19294
19295 if (Subtarget.hasVSX())
19296 return true;
19297
19299}
19300
19302 if (DisableILPPref || Subtarget.enableMachineScheduler())
19304
19305 return Sched::ILP;
19306}
19307
19308// Create a fast isel object.
19310 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
19311 const LibcallLoweringInfo *LibcallLowering) const {
19312 return PPC::createFastISel(FuncInfo, LibInfo, LibcallLowering);
19313}
19314
19315// 'Inverted' means the FMA opcode after negating one multiplicand.
19316// For example, (fma -a b c) = (fnmsub a b c)
19317static unsigned invertFMAOpcode(unsigned Opc) {
19318 switch (Opc) {
19319 default:
19320 llvm_unreachable("Invalid FMA opcode for PowerPC!");
19321 case ISD::FMA:
19322 return PPCISD::FNMSUB;
19323 case PPCISD::FNMSUB:
19324 return ISD::FMA;
19325 }
19326}
19327
19329 bool LegalOps, bool OptForSize,
19331 unsigned Depth) const {
19333 return SDValue();
19334
19335 unsigned Opc = Op.getOpcode();
19336 EVT VT = Op.getValueType();
19337 SDNodeFlags Flags = Op.getNode()->getFlags();
19338
19339 switch (Opc) {
19340 case PPCISD::FNMSUB:
19341 if (!Op.hasOneUse() || !isTypeLegal(VT))
19342 break;
19343
19344 SDValue N0 = Op.getOperand(0);
19345 SDValue N1 = Op.getOperand(1);
19346 SDValue N2 = Op.getOperand(2);
19347 SDLoc Loc(Op);
19348
19350 SDValue NegN2 =
19351 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
19352
19353 if (!NegN2)
19354 return SDValue();
19355
19356 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19357 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19358 // These transformations may change sign of zeroes. For example,
19359 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19360 if (Flags.hasNoSignedZeros()) {
19361 // Try and choose the cheaper one to negate.
19363 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
19364 N0Cost, Depth + 1);
19365
19367 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
19368 N1Cost, Depth + 1);
19369
19370 if (NegN0 && N0Cost <= N1Cost) {
19371 Cost = std::min(N0Cost, N2Cost);
19372 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
19373 } else if (NegN1) {
19374 Cost = std::min(N1Cost, N2Cost);
19375 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
19376 }
19377 }
19378
19379 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
19380 if (isOperationLegal(ISD::FMA, VT)) {
19381 Cost = N2Cost;
19382 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
19383 }
19384
19385 break;
19386 }
19387
19388 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19389 Cost, Depth);
19390}
19391
19392// Override to enable LOAD_STACK_GUARD lowering on Linux.
19394 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
19395 return true;
19397}
19398
19400 bool ForCodeSize) const {
19401 if (!VT.isSimple() || !Subtarget.hasVSX())
19402 return false;
19403
19404 switch(VT.getSimpleVT().SimpleTy) {
19405 default:
19406 // For FP types that are currently not supported by PPC backend, return
19407 // false. Examples: f16, f80.
19408 return false;
19409 case MVT::f32:
19410 case MVT::f64: {
19411 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19412 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19413 return true;
19414 }
19415 bool IsExact;
19416 APSInt IntResult(16, false);
19417 // The rounding mode doesn't really matter because we only care about floats
19418 // that can be converted to integers exactly.
19419 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
19420 // For exact values in the range [-16, 15] we can materialize the float.
19421 if (IsExact && IntResult <= 15 && IntResult >= -16)
19422 return true;
19423 return Imm.isZero();
19424 }
19425 case MVT::ppcf128:
19426 return Imm.isPosZero();
19427 }
19428}
19429
19430// For vector shift operation op, fold
19431// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19433 SelectionDAG &DAG) {
19434 SDValue N0 = N->getOperand(0);
19435 SDValue N1 = N->getOperand(1);
19436 EVT VT = N0.getValueType();
19437 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19438 unsigned Opcode = N->getOpcode();
19439 unsigned TargetOpcode;
19440
19441 switch (Opcode) {
19442 default:
19443 llvm_unreachable("Unexpected shift operation");
19444 case ISD::SHL:
19445 TargetOpcode = PPCISD::SHL;
19446 break;
19447 case ISD::SRL:
19448 TargetOpcode = PPCISD::SRL;
19449 break;
19450 case ISD::SRA:
19451 TargetOpcode = PPCISD::SRA;
19452 break;
19453 }
19454
19455 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
19456 N1->getOpcode() == ISD::AND)
19457 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
19458 if (Mask->getZExtValue() == OpSizeInBits - 1)
19459 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
19460
19461 return SDValue();
19462}
19463
19464SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19465 DAGCombinerInfo &DCI) const {
19466 EVT VT = N->getValueType(0);
19467 assert(VT.isVector() && "Vector type expected.");
19468
19469 unsigned Opc = N->getOpcode();
19470 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19471 "Unexpected opcode.");
19472
19473 if (!isOperationLegal(Opc, VT))
19474 return SDValue();
19475
19476 EVT EltTy = VT.getScalarType();
19477 unsigned EltBits = EltTy.getSizeInBits();
19478 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19479 return SDValue();
19480
19481 SDValue N1 = N->getOperand(1);
19482 uint64_t SplatBits = 0;
19483 bool AddSplatCase = false;
19484 unsigned OpcN1 = N1.getOpcode();
19485 if (OpcN1 == PPCISD::VADD_SPLAT &&
19487 AddSplatCase = true;
19488 SplatBits = N1.getConstantOperandVal(0);
19489 }
19490
19491 if (!AddSplatCase) {
19492 if (OpcN1 != ISD::BUILD_VECTOR)
19493 return SDValue();
19494
19495 unsigned SplatBitSize;
19496 bool HasAnyUndefs;
19497 APInt APSplatBits, APSplatUndef;
19498 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
19499 bool BVNIsConstantSplat =
19500 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
19501 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
19502 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19503 return SDValue();
19504 SplatBits = APSplatBits.getZExtValue();
19505 }
19506
19507 SDLoc DL(N);
19508 SDValue N0 = N->getOperand(0);
19509 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19510 // shift vector, which means the max value is 31/63. A shift vector of all
19511 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19512 // -16 to 15 range.
19513 if (SplatBits == (EltBits - 1)) {
19514 unsigned NewOpc;
19515 switch (Opc) {
19516 case ISD::SHL:
19517 NewOpc = PPCISD::SHL;
19518 break;
19519 case ISD::SRL:
19520 NewOpc = PPCISD::SRL;
19521 break;
19522 case ISD::SRA:
19523 NewOpc = PPCISD::SRA;
19524 break;
19525 }
19526 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19527 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19528 }
19529
19530 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19531 return SDValue();
19532
19533 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19534 // before the BUILD_VECTOR is replaced by a load.
19535 if (EltTy != MVT::i64 || SplatBits != 1)
19536 return SDValue();
19537
19538 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19539}
19540
19541SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19542 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19543 return Value;
19544
19545 if (N->getValueType(0).isVector())
19546 return combineVectorShift(N, DCI);
19547
19548 SDValue N0 = N->getOperand(0);
19549 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19550 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19551 N0.getOpcode() != ISD::SIGN_EXTEND ||
19552 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19553 N->getValueType(0) != MVT::i64)
19554 return SDValue();
19555
19556 // We can't save an operation here if the value is already extended, and
19557 // the existing shift is easier to combine.
19558 SDValue ExtsSrc = N0.getOperand(0);
19559 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19560 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19561 return SDValue();
19562
19563 SDLoc DL(N0);
19564 SDValue ShiftBy = SDValue(CN1, 0);
19565 // We want the shift amount to be i32 on the extswli, but the shift could
19566 // have an i64.
19567 if (ShiftBy.getValueType() == MVT::i64)
19568 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19569
19570 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19571 ShiftBy);
19572}
19573
19574SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19575 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19576 return Value;
19577
19578 if (N->getValueType(0).isVector())
19579 return combineVectorShift(N, DCI);
19580
19581 return SDValue();
19582}
19583
19584SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19585 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19586 return Value;
19587
19588 if (N->getValueType(0).isVector())
19589 return combineVectorShift(N, DCI);
19590
19591 return SDValue();
19592}
19593
19594// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19595// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19596// When C is zero, the equation (addi Z, -C) can be simplified to Z
19597// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19599 const PPCSubtarget &Subtarget) {
19600 if (!Subtarget.isPPC64())
19601 return SDValue();
19602
19603 SDValue LHS = N->getOperand(0);
19604 SDValue RHS = N->getOperand(1);
19605
19606 auto isZextOfCompareWithConstant = [](SDValue Op) {
19607 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19608 Op.getValueType() != MVT::i64)
19609 return false;
19610
19611 SDValue Cmp = Op.getOperand(0);
19612 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19613 Cmp.getOperand(0).getValueType() != MVT::i64)
19614 return false;
19615
19616 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19617 int64_t NegConstant = 0 - Constant->getSExtValue();
19618 // Due to the limitations of the addi instruction,
19619 // -C is required to be [-32768, 32767].
19620 return isInt<16>(NegConstant);
19621 }
19622
19623 return false;
19624 };
19625
19626 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19627 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19628
19629 // If there is a pattern, canonicalize a zext operand to the RHS.
19630 if (LHSHasPattern && !RHSHasPattern)
19631 std::swap(LHS, RHS);
19632 else if (!LHSHasPattern && !RHSHasPattern)
19633 return SDValue();
19634
19635 SDLoc DL(N);
19636 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19637 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19638 SDValue Cmp = RHS.getOperand(0);
19639 SDValue Z = Cmp.getOperand(0);
19640 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19641 int64_t NegConstant = 0 - Constant->getSExtValue();
19642
19643 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19644 default: break;
19645 case ISD::SETNE: {
19646 // when C == 0
19647 // --> addze X, (addic Z, -1).carry
19648 // /
19649 // add X, (zext(setne Z, C))--
19650 // \ when -32768 <= -C <= 32767 && C != 0
19651 // --> addze X, (addic (addi Z, -C), -1).carry
19652 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19653 DAG.getConstant(NegConstant, DL, MVT::i64));
19654 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19655 SDValue Addc =
19656 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19657 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19658 DAG.getConstant(0, DL, CarryType));
19659 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19660 DAG.getConstant(0, DL, MVT::i64),
19661 SDValue(Addc.getNode(), 1));
19662 }
19663 case ISD::SETEQ: {
19664 // when C == 0
19665 // --> addze X, (subfic Z, 0).carry
19666 // /
19667 // add X, (zext(sete Z, C))--
19668 // \ when -32768 <= -C <= 32767 && C != 0
19669 // --> addze X, (subfic (addi Z, -C), 0).carry
19670 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19671 DAG.getConstant(NegConstant, DL, MVT::i64));
19672 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19673 SDValue Subc =
19674 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19675 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
19676 DAG.getConstant(0, DL, CarryType));
19677 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
19678 DAG.getConstant(1UL, DL, CarryType));
19679 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19680 DAG.getConstant(0, DL, MVT::i64), Invert);
19681 }
19682 }
19683
19684 return SDValue();
19685}
19686
19687// Transform
19688// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19689// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19690// In this case both C1 and C2 must be known constants.
19691// C1+C2 must fit into a 34 bit signed integer.
19693 const PPCSubtarget &Subtarget) {
19694 if (!Subtarget.isUsingPCRelativeCalls())
19695 return SDValue();
19696
19697 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19698 // If we find that node try to cast the Global Address and the Constant.
19699 SDValue LHS = N->getOperand(0);
19700 SDValue RHS = N->getOperand(1);
19701
19702 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19703 std::swap(LHS, RHS);
19704
19705 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19706 return SDValue();
19707
19708 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19711
19712 // Check that both casts succeeded.
19713 if (!GSDN || !ConstNode)
19714 return SDValue();
19715
19716 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19717 SDLoc DL(GSDN);
19718
19719 // The signed int offset needs to fit in 34 bits.
19720 if (!isInt<34>(NewOffset))
19721 return SDValue();
19722
19723 // The new global address is a copy of the old global address except
19724 // that it has the updated Offset.
19725 SDValue GA =
19726 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19727 NewOffset, GSDN->getTargetFlags());
19728 SDValue MatPCRel =
19729 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19730 return MatPCRel;
19731}
19732
19733// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19734// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19735// Mathematical identity: X + 1 = X - (-1)
19736// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19737// Requirement: VSX feature for efficient xxleqv generation
19739 const PPCSubtarget &Subtarget) {
19740
19741 EVT VT = N->getValueType(0);
19742 if (!Subtarget.hasVSX())
19743 return SDValue();
19744
19745 // Handle v2i64, v4i32, v8i16 and v16i8 types
19746 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
19747 VT == MVT::v2i64))
19748 return SDValue();
19749
19750 SDValue LHS = N->getOperand(0);
19751 SDValue RHS = N->getOperand(1);
19752
19753 // Check if RHS is BUILD_VECTOR
19754 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19755 return SDValue();
19756
19757 // Check if all the elements are 1
19758 unsigned NumOfEles = RHS.getNumOperands();
19759 for (unsigned i = 0; i < NumOfEles; ++i) {
19760 auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
19761 if (!CN || CN->getSExtValue() != 1)
19762 return SDValue();
19763 }
19764 SDLoc DL(N);
19765
19766 SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
19767 SmallVector<SDValue, 4> Ops(4, MinusOne);
19768 SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
19769
19770 // Bitcast to the target vector type
19771 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
19772
19773 return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
19774}
19775
19776SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19777 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
19778 return Value;
19779
19780 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
19781 return Value;
19782
19783 if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
19784 return Value;
19785 return SDValue();
19786}
19787
19788// Detect TRUNCATE operations on bitcasts of float128 values.
19789// What we are looking for here is the situtation where we extract a subset
19790// of bits from a 128 bit float.
19791// This can be of two forms:
19792// 1) BITCAST of f128 feeding TRUNCATE
19793// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19794// The reason this is required is because we do not have a legal i128 type
19795// and so we want to prevent having to store the f128 and then reload part
19796// of it.
19797SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19798 DAGCombinerInfo &DCI) const {
19799 // If we are using CRBits then try that first.
19800 if (Subtarget.useCRBits()) {
19801 // Check if CRBits did anything and return that if it did.
19802 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19803 return CRTruncValue;
19804 }
19805
19806 SDLoc dl(N);
19807 SDValue Op0 = N->getOperand(0);
19808
19809 // Looking for a truncate of i128 to i64.
19810 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
19811 return SDValue();
19812
19813 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19814
19815 // SRL feeding TRUNCATE.
19816 if (Op0.getOpcode() == ISD::SRL) {
19817 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
19818 // The right shift has to be by 64 bits.
19819 if (!ConstNode || ConstNode->getZExtValue() != 64)
19820 return SDValue();
19821
19822 // Switch the element number to extract.
19823 EltToExtract = EltToExtract ? 0 : 1;
19824 // Update Op0 past the SRL.
19825 Op0 = Op0.getOperand(0);
19826 }
19827
19828 // BITCAST feeding a TRUNCATE possibly via SRL.
19829 if (Op0.getOpcode() == ISD::BITCAST &&
19830 Op0.getValueType() == MVT::i128 &&
19831 Op0.getOperand(0).getValueType() == MVT::f128) {
19832 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
19833 return DCI.DAG.getNode(
19834 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
19835 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
19836 }
19837 return SDValue();
19838}
19839
19840SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19841 SelectionDAG &DAG = DCI.DAG;
19842
19843 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
19844 if (!ConstOpOrElement)
19845 return SDValue();
19846
19847 // An imul is usually smaller than the alternative sequence for legal type.
19849 isOperationLegal(ISD::MUL, N->getValueType(0)))
19850 return SDValue();
19851
19852 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19853 switch (this->Subtarget.getCPUDirective()) {
19854 default:
19855 // TODO: enhance the condition for subtarget before pwr8
19856 return false;
19857 case PPC::DIR_PWR8:
19858 // type mul add shl
19859 // scalar 4 1 1
19860 // vector 7 2 2
19861 return true;
19862 case PPC::DIR_PWR9:
19863 case PPC::DIR_PWR10:
19864 case PPC::DIR_PWR11:
19866 // type mul add shl
19867 // scalar 5 2 2
19868 // vector 7 2 2
19869
19870 // The cycle RATIO of related operations are showed as a table above.
19871 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19872 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19873 // are 4, it is always profitable; but for 3 instrs patterns
19874 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19875 // So we should only do it for vector type.
19876 return IsAddOne && IsNeg ? VT.isVector() : true;
19877 }
19878 };
19879
19880 EVT VT = N->getValueType(0);
19881 SDLoc DL(N);
19882
19883 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19884 bool IsNeg = MulAmt.isNegative();
19885 APInt MulAmtAbs = MulAmt.abs();
19886
19887 if ((MulAmtAbs - 1).isPowerOf2()) {
19888 // (mul x, 2^N + 1) => (add (shl x, N), x)
19889 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19890
19891 if (!IsProfitable(IsNeg, true, VT))
19892 return SDValue();
19893
19894 SDValue Op0 = N->getOperand(0);
19895 SDValue Op1 =
19896 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19897 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
19898 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
19899
19900 if (!IsNeg)
19901 return Res;
19902
19903 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
19904 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19905 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19906 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19907
19908 if (!IsProfitable(IsNeg, false, VT))
19909 return SDValue();
19910
19911 SDValue Op0 = N->getOperand(0);
19912 SDValue Op1 =
19913 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19914 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
19915
19916 if (!IsNeg)
19917 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
19918 else
19919 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
19920
19921 } else {
19922 return SDValue();
19923 }
19924}
19925
19926// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19927// in combiner since we need to check SD flags and other subtarget features.
19928SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19929 DAGCombinerInfo &DCI) const {
19930 SDValue N0 = N->getOperand(0);
19931 SDValue N1 = N->getOperand(1);
19932 SDValue N2 = N->getOperand(2);
19933 SDNodeFlags Flags = N->getFlags();
19934 EVT VT = N->getValueType(0);
19935 SelectionDAG &DAG = DCI.DAG;
19936 unsigned Opc = N->getOpcode();
19938 bool LegalOps = !DCI.isBeforeLegalizeOps();
19939 SDLoc Loc(N);
19940
19941 if (!isOperationLegal(ISD::FMA, VT))
19942 return SDValue();
19943
19944 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19945 // since (fnmsub a b c)=-0 while c-ab=+0.
19946 if (!Flags.hasNoSignedZeros())
19947 return SDValue();
19948
19949 // (fma (fneg a) b c) => (fnmsub a b c)
19950 // (fnmsub (fneg a) b c) => (fma a b c)
19951 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
19952 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
19953
19954 // (fma a (fneg b) c) => (fnmsub a b c)
19955 // (fnmsub a (fneg b) c) => (fma a b c)
19956 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
19957 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
19958
19959 return SDValue();
19960}
19961
19962bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19963 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19964 if (!Subtarget.is64BitELFABI())
19965 return false;
19966
19967 // If not a tail call then no need to proceed.
19968 if (!CI->isTailCall())
19969 return false;
19970
19971 // If sibling calls have been disabled and tail-calls aren't guaranteed
19972 // there is no reason to duplicate.
19973 auto &TM = getTargetMachine();
19974 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19975 return false;
19976
19977 // Can't tail call a function called indirectly, or if it has variadic args.
19978 const Function *Callee = CI->getCalledFunction();
19979 if (!Callee || Callee->isVarArg())
19980 return false;
19981
19982 // Make sure the callee and caller calling conventions are eligible for tco.
19983 const Function *Caller = CI->getParent()->getParent();
19984 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
19985 CI->getCallingConv()))
19986 return false;
19987
19988 // If the function is local then we have a good chance at tail-calling it
19989 return getTargetMachine().shouldAssumeDSOLocal(Callee);
19990}
19991
19992bool PPCTargetLowering::
19993isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19994 const Value *Mask = AndI.getOperand(1);
19995 // If the mask is suitable for andi. or andis. we should sink the and.
19996 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
19997 // Can't handle constants wider than 64-bits.
19998 if (CI->getBitWidth() > 64)
19999 return false;
20000 int64_t ConstVal = CI->getZExtValue();
20001 return isUInt<16>(ConstVal) ||
20002 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
20003 }
20004
20005 // For non-constant masks, we can always use the record-form and.
20006 return true;
20007}
20008
20009/// getAddrModeForFlags - Based on the set of address flags, select the most
20010/// optimal instruction format to match by.
20011PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
20012 // This is not a node we should be handling here.
20013 if (Flags == PPC::MOF_None)
20014 return PPC::AM_None;
20015 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
20016 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
20017 if ((Flags & FlagSet) == FlagSet)
20018 return PPC::AM_DForm;
20019 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
20020 if ((Flags & FlagSet) == FlagSet)
20021 return PPC::AM_DSForm;
20022 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
20023 if ((Flags & FlagSet) == FlagSet)
20024 return PPC::AM_DQForm;
20025 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
20026 if ((Flags & FlagSet) == FlagSet)
20027 return PPC::AM_PrefixDForm;
20028 // If no other forms are selected, return an X-Form as it is the most
20029 // general addressing mode.
20030 return PPC::AM_XForm;
20031}
20032
20033/// Set alignment flags based on whether or not the Frame Index is aligned.
20034/// Utilized when computing flags for address computation when selecting
20035/// load and store instructions.
20036static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
20037 SelectionDAG &DAG) {
20038 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
20039 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
20040 if (!FI)
20041 return;
20043 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
20044 // If this is (add $FI, $S16Imm), the alignment flags are already set
20045 // based on the immediate. We just need to clear the alignment flags
20046 // if the FI alignment is weaker.
20047 if ((FrameIndexAlign % 4) != 0)
20048 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
20049 if ((FrameIndexAlign % 16) != 0)
20050 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
20051 // If the address is a plain FrameIndex, set alignment flags based on
20052 // FI alignment.
20053 if (!IsAdd) {
20054 if ((FrameIndexAlign % 4) == 0)
20055 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20056 if ((FrameIndexAlign % 16) == 0)
20057 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20058 }
20059}
20060
20061/// Given a node, compute flags that are used for address computation when
20062/// selecting load and store instructions. The flags computed are stored in
20063/// FlagSet. This function takes into account whether the node is a constant,
20064/// an ADD, OR, or a constant, and computes the address flags accordingly.
20065static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
20066 SelectionDAG &DAG) {
20067 // Set the alignment flags for the node depending on if the node is
20068 // 4-byte or 16-byte aligned.
20069 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
20070 if ((Imm & 0x3) == 0)
20071 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20072 if ((Imm & 0xf) == 0)
20073 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20074 };
20075
20077 // All 32-bit constants can be computed as LIS + Disp.
20078 const APInt &ConstImm = CN->getAPIntValue();
20079 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
20080 FlagSet |= PPC::MOF_AddrIsSImm32;
20081 SetAlignFlagsForImm(ConstImm.getZExtValue());
20082 setAlignFlagsForFI(N, FlagSet, DAG);
20083 }
20084 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
20085 FlagSet |= PPC::MOF_RPlusSImm34;
20086 else // Let constant materialization handle large constants.
20087 FlagSet |= PPC::MOF_NotAddNorCst;
20088 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
20089 // This address can be represented as an addition of:
20090 // - Register + Imm16 (possibly a multiple of 4/16)
20091 // - Register + Imm34
20092 // - Register + PPCISD::Lo
20093 // - Register + Register
20094 // In any case, we won't have to match this as Base + Zero.
20095 SDValue RHS = N.getOperand(1);
20097 const APInt &ConstImm = CN->getAPIntValue();
20098 if (ConstImm.isSignedIntN(16)) {
20099 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
20100 SetAlignFlagsForImm(ConstImm.getZExtValue());
20101 setAlignFlagsForFI(N, FlagSet, DAG);
20102 }
20103 if (ConstImm.isSignedIntN(34))
20104 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
20105 else
20106 FlagSet |= PPC::MOF_RPlusR; // Register.
20107 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
20108 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
20109 else
20110 FlagSet |= PPC::MOF_RPlusR;
20111 } else { // The address computation is not a constant or an addition.
20112 setAlignFlagsForFI(N, FlagSet, DAG);
20113 FlagSet |= PPC::MOF_NotAddNorCst;
20114 }
20115}
20116
20117static bool isPCRelNode(SDValue N) {
20118 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
20123}
20124
20125/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
20126/// the address flags of the load/store instruction that is to be matched.
20127unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
20128 SelectionDAG &DAG) const {
20129 unsigned FlagSet = PPC::MOF_None;
20130
20131 // Compute subtarget flags.
20132 if (!Subtarget.hasP9Vector())
20133 FlagSet |= PPC::MOF_SubtargetBeforeP9;
20134 else
20135 FlagSet |= PPC::MOF_SubtargetP9;
20136
20137 if (Subtarget.hasPrefixInstrs())
20138 FlagSet |= PPC::MOF_SubtargetP10;
20139
20140 if (Subtarget.hasSPE())
20141 FlagSet |= PPC::MOF_SubtargetSPE;
20142
20143 // Check if we have a PCRel node and return early.
20144 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
20145 return FlagSet;
20146
20147 // If the node is the paired load/store intrinsics, compute flags for
20148 // address computation and return early.
20149 unsigned ParentOp = Parent->getOpcode();
20150 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
20151 (ParentOp == ISD::INTRINSIC_VOID))) {
20152 unsigned ID = Parent->getConstantOperandVal(1);
20153 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
20154 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
20155 ? Parent->getOperand(2)
20156 : Parent->getOperand(3);
20157 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
20158 FlagSet |= PPC::MOF_Vector;
20159 return FlagSet;
20160 }
20161 }
20162
20163 // Mark this as something we don't want to handle here if it is atomic
20164 // or pre-increment instruction.
20165 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
20166 if (LSB->isIndexed())
20167 return PPC::MOF_None;
20168
20169 // Compute in-memory type flags. This is based on if there are scalars,
20170 // floats or vectors.
20171 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
20172 assert(MN && "Parent should be a MemSDNode!");
20173 EVT MemVT = MN->getMemoryVT();
20174 unsigned Size = MemVT.getSizeInBits();
20175 if (MemVT.isScalarInteger()) {
20176 assert(Size <= 128 &&
20177 "Not expecting scalar integers larger than 16 bytes!");
20178 if (Size < 32)
20179 FlagSet |= PPC::MOF_SubWordInt;
20180 else if (Size == 32)
20181 FlagSet |= PPC::MOF_WordInt;
20182 else
20183 FlagSet |= PPC::MOF_DoubleWordInt;
20184 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
20185 if (Size == 128)
20186 FlagSet |= PPC::MOF_Vector;
20187 else if (Size == 256) {
20188 assert(Subtarget.pairedVectorMemops() &&
20189 "256-bit vectors are only available when paired vector memops is "
20190 "enabled!");
20191 FlagSet |= PPC::MOF_Vector;
20192 } else
20193 llvm_unreachable("Not expecting illegal vectors!");
20194 } else { // Floating point type: can be scalar, f128 or vector types.
20195 if (Size == 32 || Size == 64)
20196 FlagSet |= PPC::MOF_ScalarFloat;
20197 else if (MemVT == MVT::f128 || MemVT.isVector())
20198 FlagSet |= PPC::MOF_Vector;
20199 else
20200 llvm_unreachable("Not expecting illegal scalar floats!");
20201 }
20202
20203 // Compute flags for address computation.
20204 computeFlagsForAddressComputation(N, FlagSet, DAG);
20205
20206 // Compute type extension flags.
20207 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
20208 switch (LN->getExtensionType()) {
20209 case ISD::SEXTLOAD:
20210 FlagSet |= PPC::MOF_SExt;
20211 break;
20212 case ISD::EXTLOAD:
20213 case ISD::ZEXTLOAD:
20214 FlagSet |= PPC::MOF_ZExt;
20215 break;
20216 case ISD::NON_EXTLOAD:
20217 FlagSet |= PPC::MOF_NoExt;
20218 break;
20219 }
20220 } else
20221 FlagSet |= PPC::MOF_NoExt;
20222
20223 // For integers, no extension is the same as zero extension.
20224 // We set the extension mode to zero extension so we don't have
20225 // to add separate entries in AddrModesMap for loads and stores.
20226 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
20227 FlagSet |= PPC::MOF_ZExt;
20228 FlagSet &= ~PPC::MOF_NoExt;
20229 }
20230
20231 // If we don't have prefixed instructions, 34-bit constants should be
20232 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
20233 bool IsNonP1034BitConst =
20235 FlagSet) == PPC::MOF_RPlusSImm34;
20236 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
20237 IsNonP1034BitConst)
20238 FlagSet |= PPC::MOF_NotAddNorCst;
20239
20240 return FlagSet;
20241}
20242
20243/// SelectForceXFormMode - Given the specified address, force it to be
20244/// represented as an indexed [r+r] operation (an XForm instruction).
20246 SDValue &Base,
20247 SelectionDAG &DAG) const {
20248
20250 int16_t ForceXFormImm = 0;
20251 if (provablyDisjointOr(DAG, N) &&
20252 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
20253 Disp = N.getOperand(0);
20254 Base = N.getOperand(1);
20255 return Mode;
20256 }
20257
20258 // If the address is the result of an add, we will utilize the fact that the
20259 // address calculation includes an implicit add. However, we can reduce
20260 // register pressure if we do not materialize a constant just for use as the
20261 // index register. We only get rid of the add if it is not an add of a
20262 // value and a 16-bit signed constant and both have a single use.
20263 if (N.getOpcode() == ISD::ADD &&
20264 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
20265 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
20266 Disp = N.getOperand(0);
20267 Base = N.getOperand(1);
20268 return Mode;
20269 }
20270
20271 // Otherwise, use R0 as the base register.
20272 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20273 N.getValueType());
20274 Base = N;
20275
20276 return Mode;
20277}
20278
20280 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20281 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20282 EVT ValVT = Val.getValueType();
20283 // If we are splitting a scalar integer into f64 parts (i.e. so they
20284 // can be placed into VFRC registers), we need to zero extend and
20285 // bitcast the values. This will ensure the value is placed into a
20286 // VSR using direct moves or stack operations as needed.
20287 if (PartVT == MVT::f64 &&
20288 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
20289 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
20290 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
20291 Parts[0] = Val;
20292 return true;
20293 }
20294 return false;
20295}
20296
20297SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
20298 SelectionDAG &DAG) const {
20299 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20301 EVT RetVT = Op.getValueType();
20302 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
20303 SDValue Callee =
20304 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
20305 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
20307 for (const SDValue &N : Op->op_values()) {
20308 EVT ArgVT = N.getValueType();
20309 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
20310 TargetLowering::ArgListEntry Entry(N, ArgTy);
20311 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
20312 Entry.IsZExt = !Entry.IsSExt;
20313 Args.push_back(Entry);
20314 }
20315
20316 SDValue InChain = DAG.getEntryNode();
20317 SDValue TCChain = InChain;
20318 const Function &F = DAG.getMachineFunction().getFunction();
20319 bool isTailCall =
20320 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
20321 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
20322 if (isTailCall)
20323 InChain = TCChain;
20324 CLI.setDebugLoc(SDLoc(Op))
20325 .setChain(InChain)
20326 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
20327 .setTailCall(isTailCall)
20328 .setSExtResult(SignExtend)
20329 .setZExtResult(!SignExtend)
20331 return TLI.LowerCallTo(CLI).first;
20332}
20333
20334SDValue PPCTargetLowering::lowerLibCallBasedOnType(
20335 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
20336 SelectionDAG &DAG) const {
20337 if (Op.getValueType() == MVT::f32)
20338 return lowerToLibCall(LibCallFloatName, Op, DAG);
20339
20340 if (Op.getValueType() == MVT::f64)
20341 return lowerToLibCall(LibCallDoubleName, Op, DAG);
20342
20343 return SDValue();
20344}
20345
20346bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20347 SDNodeFlags Flags = Op.getNode()->getFlags();
20348 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20349 Flags.hasNoNaNs() && Flags.hasNoInfs();
20350}
20351
20352bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20353 return Op.getNode()->getFlags().hasApproximateFuncs();
20354}
20355
20356bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20358}
20359
20360SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20361 const char *LibCallFloatName,
20362 const char *LibCallDoubleNameFinite,
20363 const char *LibCallFloatNameFinite,
20364 SDValue Op,
20365 SelectionDAG &DAG) const {
20366 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
20367 return SDValue();
20368
20369 if (!isLowringToMASSFiniteSafe(Op))
20370 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20371 DAG);
20372
20373 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
20374 LibCallDoubleNameFinite, Op, DAG);
20375}
20376
20377SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20378 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
20379 "__xl_powf_finite", Op, DAG);
20380}
20381
20382SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20383 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
20384 "__xl_sinf_finite", Op, DAG);
20385}
20386
20387SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20388 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
20389 "__xl_cosf_finite", Op, DAG);
20390}
20391
20392SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20393 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
20394 "__xl_logf_finite", Op, DAG);
20395}
20396
20397SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20398 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
20399 "__xl_log10f_finite", Op, DAG);
20400}
20401
20402SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20403 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
20404 "__xl_expf_finite", Op, DAG);
20405}
20406
20407// If we happen to match to an aligned D-Form, check if the Frame Index is
20408// adequately aligned. If it is not, reset the mode to match to X-Form.
20409static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20412 return;
20413 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
20416}
20417
20418/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20419/// compute the address flags of the node, get the optimal address mode based
20420/// on the flags, and set the Base and Disp based on the address mode.
20422 SDValue N, SDValue &Disp,
20423 SDValue &Base,
20424 SelectionDAG &DAG,
20425 MaybeAlign Align) const {
20426 SDLoc DL(Parent);
20427
20428 // Compute the address flags.
20429 unsigned Flags = computeMOFlags(Parent, N, DAG);
20430
20431 // Get the optimal address mode based on the Flags.
20432 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20433
20434 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20435 // Select an X-Form load if it is not.
20436 setXFormForUnalignedFI(N, Flags, Mode);
20437
20438 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20439 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20440 assert(Subtarget.isUsingPCRelativeCalls() &&
20441 "Must be using PC-Relative calls when a valid PC-Relative node is "
20442 "present!");
20443 Mode = PPC::AM_PCRel;
20444 }
20445
20446 // Set Base and Disp accordingly depending on the address mode.
20447 switch (Mode) {
20448 case PPC::AM_DForm:
20449 case PPC::AM_DSForm:
20450 case PPC::AM_DQForm: {
20451 // This is a register plus a 16-bit immediate. The base will be the
20452 // register and the displacement will be the immediate unless it
20453 // isn't sufficiently aligned.
20454 if (Flags & PPC::MOF_RPlusSImm16) {
20455 SDValue Op0 = N.getOperand(0);
20456 SDValue Op1 = N.getOperand(1);
20457 int16_t Imm = Op1->getAsZExtVal();
20458 if (!Align || isAligned(*Align, Imm)) {
20459 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
20460 Base = Op0;
20462 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20463 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20464 }
20465 break;
20466 }
20467 }
20468 // This is a register plus the @lo relocation. The base is the register
20469 // and the displacement is the global address.
20470 else if (Flags & PPC::MOF_RPlusLo) {
20471 Disp = N.getOperand(1).getOperand(0); // The global address.
20476 Base = N.getOperand(0);
20477 break;
20478 }
20479 // This is a constant address at most 32 bits. The base will be
20480 // zero or load-immediate-shifted and the displacement will be
20481 // the low 16 bits of the address.
20482 else if (Flags & PPC::MOF_AddrIsSImm32) {
20483 auto *CN = cast<ConstantSDNode>(N);
20484 EVT CNType = CN->getValueType(0);
20485 uint64_t CNImm = CN->getZExtValue();
20486 // If this address fits entirely in a 16-bit sext immediate field, codegen
20487 // this as "d, 0".
20488 int16_t Imm;
20489 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
20490 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
20491 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20492 CNType);
20493 break;
20494 }
20495 // Handle 32-bit sext immediate with LIS + Addr mode.
20496 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
20497 (!Align || isAligned(*Align, CNImm))) {
20498 int32_t Addr = (int32_t)CNImm;
20499 // Otherwise, break this down into LIS + Disp.
20500 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
20501 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
20502 MVT::i32);
20503 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20504 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
20505 break;
20506 }
20507 }
20508 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20509 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
20511 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20512 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20513 } else
20514 Base = N;
20515 break;
20516 }
20517 case PPC::AM_PrefixDForm: {
20518 int64_t Imm34 = 0;
20519 unsigned Opcode = N.getOpcode();
20520 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20521 (isIntS34Immediate(N.getOperand(1), Imm34))) {
20522 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20523 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20524 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
20525 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20526 else
20527 Base = N.getOperand(0);
20528 } else if (isIntS34Immediate(N, Imm34)) {
20529 // The address is a 34-bit signed immediate.
20530 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20531 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
20532 }
20533 break;
20534 }
20535 case PPC::AM_PCRel: {
20536 // When selecting PC-Relative instructions, "Base" is not utilized as
20537 // we select the address as [PC+imm].
20538 Disp = N;
20539 break;
20540 }
20541 case PPC::AM_None:
20542 break;
20543 default: { // By default, X-Form is always available to be selected.
20544 // When a frame index is not aligned, we also match by XForm.
20546 Base = FI ? N : N.getOperand(1);
20547 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20548 N.getValueType())
20549 : N.getOperand(0);
20550 break;
20551 }
20552 }
20553 return Mode;
20554}
20555
20557 bool Return,
20558 bool IsVarArg) const {
20559 switch (CC) {
20560 case CallingConv::Cold:
20561 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20562 default:
20563 return CC_PPC64_ELF;
20564 }
20565}
20566
20568 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20569}
20570
20573 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20574 if (shouldInlineQuadwordAtomics() && Size == 128)
20576
20577 switch (AI->getOperation()) {
20583 default:
20585 }
20586
20587 llvm_unreachable("unreachable atomicrmw operation");
20588}
20589
20598
20599static Intrinsic::ID
20601 switch (BinOp) {
20602 default:
20603 llvm_unreachable("Unexpected AtomicRMW BinOp");
20605 return Intrinsic::ppc_atomicrmw_xchg_i128;
20606 case AtomicRMWInst::Add:
20607 return Intrinsic::ppc_atomicrmw_add_i128;
20608 case AtomicRMWInst::Sub:
20609 return Intrinsic::ppc_atomicrmw_sub_i128;
20610 case AtomicRMWInst::And:
20611 return Intrinsic::ppc_atomicrmw_and_i128;
20612 case AtomicRMWInst::Or:
20613 return Intrinsic::ppc_atomicrmw_or_i128;
20614 case AtomicRMWInst::Xor:
20615 return Intrinsic::ppc_atomicrmw_xor_i128;
20617 return Intrinsic::ppc_atomicrmw_nand_i128;
20618 }
20619}
20620
20622 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20623 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20624 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20625 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20626 Type *ValTy = Incr->getType();
20627 assert(ValTy->getPrimitiveSizeInBits() == 128);
20628 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20629 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20630 Value *IncrHi =
20631 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20632 Value *LoHi = Builder.CreateIntrinsic(
20634 {AlignedAddr, IncrLo, IncrHi});
20635 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20636 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20637 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20638 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20639 return Builder.CreateOr(
20640 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20641}
20642
20644 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20645 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20646 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20647 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20648 Type *ValTy = CmpVal->getType();
20649 assert(ValTy->getPrimitiveSizeInBits() == 128);
20650 Function *IntCmpXchg =
20651 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20652 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20653 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20654 Value *CmpHi =
20655 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20656 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20657 Value *NewHi =
20658 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20659 emitLeadingFence(Builder, CI, Ord);
20660 Value *LoHi =
20661 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20662 emitTrailingFence(Builder, CI, Ord);
20663 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20664 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20665 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20666 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20667 return Builder.CreateOr(
20668 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20669}
20670
20672 return Subtarget.useCRBits();
20673}
20674
20675/// Shuffle masks for vectors of bits are not legal as such vectors are
20676/// reserved for MMA/DM.
20677bool PPCTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const {
20678 if (VT.getScalarType() == MVT::i1)
20679 return false;
20680 return TargetLowering::isShuffleMaskLegal(Mask, VT);
20681}
20682
20683// Optimize the following patterns using vbpermq/vbpermd:
20684// i16 = bitcast(v16i1 truncate(v16i8))
20685// i8 = bitcast(v8i1 truncate(v8i16))
20686// i8 = bitcast(v8i1 truncate(v8i8))
20687SDValue PPCTargetLowering::DAGCombineBitcast(SDNode *N,
20688 DAGCombinerInfo &DCI) const {
20689 SDValue Op0 = N->getOperand(0);
20690 if (Op0.getOpcode() != ISD::TRUNCATE)
20691 return SDValue();
20692 SDValue Src = Op0.getOperand(0);
20693 EVT ResVT = N->getValueType(0);
20694 EVT TruncResVT = Op0.getValueType();
20695 EVT SrcVT = Src.getValueType();
20696 SDLoc dl(N);
20697 SelectionDAG &DAG = DCI.DAG;
20698 bool IsLittleEndian = Subtarget.isLittleEndian();
20699
20700 if (ResVT != MVT::i16 && ResVT != MVT::i8)
20701 return SDValue();
20702 SDValue VBPerm =
20703 GenerateVBPERM(DAG, dl, Src, SrcVT, TruncResVT, IsLittleEndian);
20704 if (!VBPerm)
20705 return SDValue();
20706 SDValue ForExtract = DAG.getBitcast(MVT::v4i32, VBPerm);
20707 SDValue Extracted =
20708 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, ForExtract,
20709 DAG.getIntPtrConstant(IsLittleEndian ? 2 : 1, dl));
20710 return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Extracted);
20711}
20712
20713SDValue PPCTargetLowering::GenerateVBPERM(SelectionDAG &DAG, SDLoc dl,
20714 SDValue Src, EVT SrcVT, EVT ResVT,
20715 bool IsLE) const {
20716 bool IsV16i8 = (ResVT == MVT::v16i1 && SrcVT == MVT::v16i8);
20717 bool IsV8i16 = (ResVT == MVT::v8i1 && SrcVT == MVT::v8i16);
20718 bool IsV8i8 = (ResVT == MVT::v8i1 && SrcVT == MVT::v8i8);
20719
20720 if (!IsV16i8 && !IsV8i16 && !IsV8i8)
20721 return SDValue();
20722
20723 if (IsV8i8) {
20724 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i8,
20725 DAG.getUNDEF(MVT::v16i8), Src,
20726 DAG.getIntPtrConstant(0, dl));
20727 }
20728 SmallVector<int, 16> BitIndices(16, 128);
20729 unsigned NumElts = SrcVT.getVectorNumElements();
20730 unsigned EltSize = SrcVT.getScalarType().getSizeInBits();
20731 for (int Idx = 0, End = SrcVT.getVectorNumElements(); Idx < End; Idx++) {
20732 BitIndices[Idx] = EltSize * (NumElts - Idx) - 1;
20733 if (IsV8i8 && IsLE)
20734 BitIndices[Idx] += 64;
20735 }
20736 if (!IsLE)
20737 std::reverse(BitIndices.begin(), BitIndices.end());
20739 for (auto Idx : BitIndices)
20740 BVOps.push_back(DAG.getConstant(Idx, dl, MVT::i8));
20741 SDValue VRB = DAG.getBuildVector(MVT::v16i8, dl, BVOps);
20742 return DAG.getNode(
20743 ISD::INTRINSIC_WO_CHAIN, dl, MVT::v16i8,
20744 DAG.getConstant(Intrinsic::ppc_altivec_vbpermq, dl, MVT::i32),
20745 DAG.getBitcast(MVT::v16i8, Src), VRB);
20746}
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS)
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, SelectionDAG &DAG)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static bool canConvertSETCCToXori(SDNode *N)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N, const SDLoc &DL)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5890
bool isDenormal() const
Definition APFloat.h:1517
APInt bitcastToAPInt() const
Definition APFloat.h:1408
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1421
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
APInt abs() const
Get the absolute value.
Definition APInt.h:1810
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1411
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1737
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:215
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:362
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:775
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
arg_iterator arg_begin()
Definition Function.h:868
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
const Argument * const_arg_iterator
Definition Function.h:74
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:659
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:200
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:141
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
Tracks which library functions to use for a particular subtarget.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1080
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:184
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:275
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ TargetConstantPool
Definition ISDOpcodes.h:189
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SET_ROUNDING
Set rounding mode.
Definition ISDOpcodes.h:975
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ TargetExternalSymbol
Definition ISDOpcodes.h:190
@ BR
Control flow instructions. These all have token chains.
@ TargetJumpTable
Definition ISDOpcodes.h:188
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:185
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:970
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ INLINEASM
INLINEASM - Represents an inline asm block.
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:186
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:154
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:148
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:196
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:199
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:174
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:205
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:156
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:123
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:152
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:202
@ MO_TPREL_HA
Definition PPC.h:181
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:115
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:190
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:142
@ MO_TPREL_LO
Definition PPC.h:180
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:177
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:168
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:193
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:137
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:162
@ MO_HA
Definition PPC.h:178
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:119
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering)
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:345
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
LLVM_ABI std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:150
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.