LLVM 23.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
148 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
166// A faster local-[exec|dynamic] TLS access sequence (enabled with the
167// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
168// variables; consistent with the IBM XL compiler, we apply a max size of
169// slightly under 32KB.
171
172// FIXME: Remove this once the bug has been fixed!
174
176 const PPCSubtarget &STI)
177 : TargetLowering(TM, STI), Subtarget(STI) {
178 // Initialize map that relates the PPC addressing modes to the computed flags
179 // of a load/store instruction. The map is used to determine the optimal
180 // addressing mode when selecting load and stores.
181 initializeAddrModeMap();
182 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183 // arguments are at least 4/8 bytes aligned.
184 bool isPPC64 = Subtarget.isPPC64();
185 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
186 const MVT RegVT = Subtarget.getScalarIntVT();
187
188 // Set up the register classes.
189 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
190 if (!useSoftFloat()) {
191 if (hasSPE()) {
192 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
193 // EFPU2 APU only supports f32
194 if (!Subtarget.hasEFPU2())
195 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
196 } else {
197 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
198 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
199 }
200 }
201
204
205 // PowerPC uses addo_carry,subo_carry to propagate carry.
208
209 // On P10, the default lowering generates better code using the
210 // setbc instruction.
211 if (!Subtarget.hasP10Vector()) {
214 if (isPPC64) {
217 }
218 }
219
220 // Match BITREVERSE to customized fast code sequence in the td file.
223
224 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
226
227 // Custom lower inline assembly to check for special registers.
230
231 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
232 for (MVT VT : MVT::integer_valuetypes()) {
235 }
236
237 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
239
240 if (Subtarget.isISA3_0()) {
241 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
242 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
243 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
244 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
245 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
246 } else {
247 // No extending loads from f16 or HW conversions back and forth.
248 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
250 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
253 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
256 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
257 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
258 }
259
260 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
261
262 // PowerPC has pre-inc load and store's.
273 if (!Subtarget.hasSPE()) {
278 }
279
280 if (Subtarget.useCRBits()) {
282
283 if (isPPC64 || Subtarget.hasFPCVT()) {
288
290 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
292 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
293
298
300 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
302 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
303 } else {
308 }
309
310 // PowerPC does not support direct load/store of condition registers.
313
314 // FIXME: Remove this once the ANDI glue bug is fixed:
315 if (ANDIGlueBug)
317
318 for (MVT VT : MVT::integer_valuetypes()) {
321 setTruncStoreAction(VT, MVT::i1, Expand);
322 }
323
324 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
325 }
326
327 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
328 // PPC (the libcall is not available).
333
334 // We do not currently implement these libm ops for PowerPC.
335 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
336 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
337 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
338 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
340 setOperationAction(ISD::FREM, MVT::ppcf128, LibCall);
341
342 // PowerPC has no SREM/UREM instructions unless we are on P9
343 // On P9 we may use a hardware instruction to compute the remainder.
344 // When the result of both the remainder and the division is required it is
345 // more efficient to compute the remainder from the result of the division
346 // rather than use the remainder instruction. The instructions are legalized
347 // directly because the DivRemPairsPass performs the transformation at the IR
348 // level.
349 if (Subtarget.isISA3_0()) {
354 } else {
359 }
360
361 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
370
371 // Handle constrained floating-point operations of scalar.
372 // TODO: Handle SPE specific operation.
378
383
384 if (!Subtarget.hasSPE()) {
387 }
388
389 if (Subtarget.hasVSX()) {
392 }
393
394 if (Subtarget.hasFSQRT()) {
397 }
398
399 if (Subtarget.hasFPRND()) {
404
409 }
410
411 // We don't support sin/cos/sqrt/fmod/pow
422
423 // MASS transformation for LLVM intrinsics with replicating fast-math flag
424 // to be consistent to PPCGenScalarMASSEntries pass
425 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
438 }
439
440 if (Subtarget.hasSPE()) {
443 } else {
444 setOperationAction(ISD::FMA , MVT::f64, Legal);
445 setOperationAction(ISD::FMA , MVT::f32, Legal);
448 }
449
450 if (Subtarget.hasSPE())
451 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
452
453 // If we're enabling GP optimizations, use hardware square root
454 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
456
457 if (!Subtarget.hasFSQRT() &&
458 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
460
461 if (Subtarget.hasFCPSGN()) {
464 } else {
467 }
468
469 if (Subtarget.hasFPRND()) {
474
479 }
480
481 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
482 // instruction xxbrd to speed up scalar BSWAP64.
483 if (Subtarget.isISA3_1()) {
486 } else {
489 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
490 }
491
492 // CTPOP or CTTZ were introduced in P8/P9 respectively
493 if (Subtarget.isISA3_0()) {
494 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
495 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
496 } else {
497 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
498 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
499 }
500
501 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
504 } else {
507 }
508
509 // PowerPC does not have ROTR
512
513 if (!Subtarget.useCRBits()) {
514 // PowerPC does not have Select
519 }
520
521 // PowerPC wants to turn select_cc of FP into fsel when possible.
524
525 // PowerPC wants to optimize integer setcc a bit
526 if (!Subtarget.useCRBits())
528
529 if (Subtarget.hasFPU()) {
533
537 }
538
539 // PowerPC does not have BRCOND which requires SetCC
540 if (!Subtarget.useCRBits())
542
544
545 if (Subtarget.hasSPE()) {
546 // SPE has built-in conversions
553
554 // SPE supports signaling compare of f32/f64.
557 } else {
558 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
561
562 // PowerPC does not have [U|S]INT_TO_FP
567 }
568
569 if (Subtarget.hasDirectMove() && isPPC64) {
574
583 } else {
588 }
589
590 // We cannot sextinreg(i1). Expand to shifts.
592
593 // Custom handling for PowerPC ucmp instruction
595 setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
596
597 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
598 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
599 // support continuation, user-level threading, and etc.. As a result, no
600 // other SjLj exception interfaces are implemented and please don't build
601 // your own exception handling based on them.
602 // LLVM/Clang supports zero-cost DWARF exception handling.
605
606 // We want to legalize GlobalAddress and ConstantPool nodes into the
607 // appropriate instructions to materialize the address.
618
619 // TRAP is legal.
620 setOperationAction(ISD::TRAP, MVT::Other, Legal);
621
622 // TRAMPOLINE is custom lowered.
625
626 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
628
629 if (Subtarget.is64BitELFABI()) {
630 // VAARG always uses double-word chunks, so promote anything smaller.
632 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
634 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
636 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
638 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
640 } else if (Subtarget.is32BitELFABI()) {
641 // VAARG is custom lowered with the 32-bit SVR4 ABI.
644 } else
646
647 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
648 if (Subtarget.is32BitELFABI())
650 else
652
653 // Use the default implementation.
654 setOperationAction(ISD::VAEND , MVT::Other, Expand);
663
664 if (Subtarget.isISA3_0() && isPPC64) {
665 setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
666 setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
667 setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
668 setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
669 setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
670 setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
671 setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
672 setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
673 }
674
675 // We want to custom lower some of our intrinsics.
681
682 // To handle counter-based loop conditions.
685
690
691 // Comparisons that require checking two conditions.
692 if (Subtarget.hasSPE()) {
697 }
710
713
714 if (Subtarget.has64BitSupport()) {
715 // They also have instructions for converting between i64 and fp.
724 // This is just the low 32 bits of a (signed) fp->i64 conversion.
725 // We cannot do this with Promote because i64 is not a legal type.
728
729 if (Subtarget.hasLFIWAX() || isPPC64) {
732 }
733 } else {
734 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
735 if (Subtarget.hasSPE()) {
738 } else {
741 }
742 }
743
744 // With the instructions enabled under FPCVT, we can do everything.
745 if (Subtarget.hasFPCVT()) {
746 if (Subtarget.has64BitSupport()) {
755 }
756
765 }
766
767 if (Subtarget.use64BitRegs()) {
768 // 64-bit PowerPC implementations can support i64 types directly
769 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
770 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
772 // 64-bit PowerPC wants to expand i128 shifts itself.
776 } else {
777 // 32-bit PowerPC wants to expand i64 shifts itself.
781 }
782
783 // PowerPC has better expansions for funnel shifts than the generic
784 // TargetLowering::expandFunnelShift.
785 if (Subtarget.has64BitSupport()) {
788 }
791
792 if (Subtarget.hasVSX()) {
803 }
804
805 if (Subtarget.hasAltivec()) {
806 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
813 }
814 // First set operation action for all vector types to expand. Then we
815 // will selectively turn on ones that can be effectively codegen'd.
817 // add/sub are legal for all supported vector VT's.
820
821 // For v2i64, these are only valid with P8Vector. This is corrected after
822 // the loop.
823 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
828 }
829 else {
834 }
835
836 if (Subtarget.hasVSX()) {
842 }
843
844 // Vector instructions introduced in P8
845 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
848 }
849 else {
852 }
853
854 // Vector instructions introduced in P9
855 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
857 else
859
860 // We promote all shuffles to v16i8.
862 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
863
864 // We promote all non-typed operations to v4i32.
866 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
868 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
870 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
872 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
874 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
877 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
879 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
880
881 // No other operations are legal.
920
921 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
922 setTruncStoreAction(VT, InnerVT, Expand);
925 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
926 }
927 }
929 if (!Subtarget.hasP8Vector()) {
930 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
931 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
932 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
933 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
934 }
935
936 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
937 // with merges, splats, etc.
939
940 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
941 // are cheap, so handle them before they get expanded to scalar.
947
948 setOperationAction(ISD::AND , MVT::v4i32, Legal);
949 setOperationAction(ISD::OR , MVT::v4i32, Legal);
950 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
951 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
953 Subtarget.useCRBits() ? Legal : Expand);
954 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
964 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
967
968 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
969 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
970 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
971 if (Subtarget.hasAltivec())
972 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
974 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
975 if (Subtarget.hasP8Altivec())
976 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
977
978 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
979 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
980 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
981 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
982
983 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
984 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
985
986 if (Subtarget.hasVSX()) {
987 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
988 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
990 }
991
992 if (Subtarget.hasP8Altivec())
993 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
994 else
995 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
996
997 if (Subtarget.isISA3_1()) {
998 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
999 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
1000 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
1001 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
1002 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
1003 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
1004 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
1005 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
1006 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
1007 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
1008 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
1009 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
1010 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
1011 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
1012 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
1013 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
1014 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
1015 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
1016 }
1017
1018 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1019 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1020
1023 // LE is P8+/64-bit so direct moves are supported and these operations
1024 // are legal. The custom transformation requires 64-bit since we need a
1025 // pair of stores that will cover a 128-bit load for P10.
1026 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1030 }
1031
1036
1037 // Altivec does not contain unordered floating-point compare instructions
1038 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1039 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1040 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1041 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1042
1043 if (Subtarget.hasVSX()) {
1046 if (Subtarget.hasP8Vector()) {
1049 }
1050 if (Subtarget.hasDirectMove() && isPPC64) {
1059 }
1061
1062 // The nearbyint variants are not allowed to raise the inexact exception
1063 // so we can only code-gen them with fpexcept.ignore.
1068
1069 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1070 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1071 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1072 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1073 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1076
1077 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1078 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1081
1082 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1083 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1084
1085 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1086 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1087
1088 // Share the Altivec comparison restrictions.
1089 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1090 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1091 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1092 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1093
1094 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1095 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1096
1098
1099 if (Subtarget.hasP8Vector())
1100 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1101
1102 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1103
1104 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1105 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1106 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1107
1108 if (Subtarget.hasP8Altivec()) {
1109 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1110 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1111 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1112
1113 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1114 // SRL, but not for SRA because of the instructions available:
1115 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1116 // doing
1117 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1118 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1119 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1120
1121 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1122 }
1123 else {
1124 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1125 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1126 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1127
1128 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1129
1130 // VSX v2i64 only supports non-arithmetic operations.
1131 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1132 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1133 }
1134
1135 if (Subtarget.isISA3_1())
1136 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1137 else
1138 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1139
1140 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1141 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1143 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1144
1146
1155
1156 // Custom handling for partial vectors of integers converted to
1157 // floating point. We already have optimal handling for v2i32 through
1158 // the DAG combine, so those aren't necessary.
1175
1176 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1177 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1178 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1179 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1182
1185
1186 // Handle constrained floating-point operations of vector.
1187 // The predictor is `hasVSX` because altivec instruction has
1188 // no exception but VSX vector instruction has.
1202
1216
1217 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1218 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1219
1220 for (MVT FPT : MVT::fp_valuetypes())
1221 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1222
1223 // Expand the SELECT to SELECT_CC
1225
1226 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1227 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1228
1229 // No implementation for these ops for PowerPC.
1231 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1232 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1233 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1236 }
1237
1238 if (Subtarget.hasP8Altivec()) {
1239 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1240 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1241 }
1242
1243 if (Subtarget.hasP9Vector()) {
1246
1247 // Test data class instructions store results in CR bits.
1248 if (Subtarget.useCRBits()) {
1253 }
1254
1255 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1256 // SRL, but not for SRA because of the instructions available:
1257 // VS{RL} and VS{RL}O.
1258 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1259 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1260 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1261
1262 setOperationAction(ISD::FADD, MVT::f128, Legal);
1263 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1264 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1265 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1267
1268 setOperationAction(ISD::FMA, MVT::f128, Legal);
1275
1277 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1279 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1282
1286
1287 // Handle constrained floating-point operations of fp128
1304 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1305 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1306 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1307 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1308 } else if (Subtarget.hasVSX()) {
1311
1312 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1313 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1314
1315 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1316 // fp_to_uint and int_to_fp.
1319
1320 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1321 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1322 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1323 setOperationAction(ISD::FABS, MVT::f128, Expand);
1325 setOperationAction(ISD::FMA, MVT::f128, Expand);
1327
1328 // Expand the fp_extend if the target type is fp128.
1331
1332 // Expand the fp_round if the source type is fp128.
1333 for (MVT VT : {MVT::f32, MVT::f64}) {
1336 }
1337
1342
1343 // Lower following f128 select_cc pattern:
1344 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1346
1347 // We need to handle f128 SELECT_CC with integer result type.
1349 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1350 }
1351
1352 if (Subtarget.hasP9Altivec()) {
1353 if (Subtarget.isISA3_1()) {
1358 } else {
1361 }
1369
1370 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1371 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1372 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1373 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1374 }
1375
1376 if (Subtarget.hasP10Vector()) {
1378 }
1379 }
1380
1381 if (Subtarget.pairedVectorMemops()) {
1382 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1383 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1384 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1385 }
1386 if (Subtarget.hasMMA()) {
1387 if (Subtarget.isISAFuture()) {
1388 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1389 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1390 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1391 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1392 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1393 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1394 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1395 } else {
1396 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1397 }
1398 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1399 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1401 }
1402
1403 if (Subtarget.has64BitSupport())
1405
1406 if (Subtarget.isISA3_1())
1407 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1408
1409 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1410
1411 if (!isPPC64) {
1414 }
1415
1420 }
1421
1423
1424 if (Subtarget.hasAltivec()) {
1425 // Altivec instructions set fields to all zeros or all ones.
1427 }
1428
1431 else if (isPPC64)
1433 else
1435
1436 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1437
1438 // We have target-specific dag combine patterns for the following nodes:
1442 if (Subtarget.hasFPCVT())
1445 if (Subtarget.useCRBits())
1449
1451
1453
1454 if (Subtarget.useCRBits()) {
1456 }
1457
1458 // With 32 condition bits, we don't need to sink (and duplicate) compares
1459 // aggressively in CodeGenPrep.
1460 if (Subtarget.useCRBits()) {
1462 }
1463
1464 // TODO: The default entry number is set to 64. This stops most jump table
1465 // generation on PPC. But it is good for current PPC HWs because the indirect
1466 // branch instruction mtctr to the jump table may lead to bad branch predict.
1467 // Re-evaluate this value on future HWs that can do better with mtctr.
1469
1470 // The default minimum of largest number in a BitTest cluster is 3.
1472
1474 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1475
1476 auto CPUDirective = Subtarget.getCPUDirective();
1477 switch (CPUDirective) {
1478 default: break;
1479 case PPC::DIR_970:
1480 case PPC::DIR_A2:
1481 case PPC::DIR_E500:
1482 case PPC::DIR_E500mc:
1483 case PPC::DIR_E5500:
1484 case PPC::DIR_PWR4:
1485 case PPC::DIR_PWR5:
1486 case PPC::DIR_PWR5X:
1487 case PPC::DIR_PWR6:
1488 case PPC::DIR_PWR6X:
1489 case PPC::DIR_PWR7:
1490 case PPC::DIR_PWR8:
1491 case PPC::DIR_PWR9:
1492 case PPC::DIR_PWR10:
1493 case PPC::DIR_PWR11:
1497 break;
1498 }
1499
1500 if (Subtarget.enableMachineScheduler())
1502 else
1504
1506
1507 // The Freescale cores do better with aggressive inlining of memcpy and
1508 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1509 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1510 MaxStoresPerMemset = 32;
1512 MaxStoresPerMemcpy = 32;
1516 } else if (CPUDirective == PPC::DIR_A2) {
1517 // The A2 also benefits from (very) aggressive inlining of memcpy and
1518 // friends. The overhead of a the function call, even when warm, can be
1519 // over one hundred cycles.
1520 MaxStoresPerMemset = 128;
1521 MaxStoresPerMemcpy = 128;
1522 MaxStoresPerMemmove = 128;
1523 MaxLoadsPerMemcmp = 128;
1524 } else {
1527 }
1528
1529 // Enable generation of STXVP instructions by default for mcpu=future.
1530 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1531 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1532 DisableAutoPairedVecSt = false;
1533
1534 IsStrictFPEnabled = true;
1535
1536 // Let the subtarget (CPU) decide if a predictable select is more expensive
1537 // than the corresponding branch. This information is used in CGP to decide
1538 // when to convert selects into branches.
1539 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1540
1542}
1543
1544// *********************************** NOTE ************************************
1545// For selecting load and store instructions, the addressing modes are defined
1546// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1547// patterns to match the load the store instructions.
1548//
1549// The TD definitions for the addressing modes correspond to their respective
1550// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1551// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1552// address mode flags of a particular node. Afterwards, the computed address
1553// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1554// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1555// accordingly, based on the preferred addressing mode.
1556//
1557// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1558// MemOpFlags contains all the possible flags that can be used to compute the
1559// optimal addressing mode for load and store instructions.
1560// AddrMode contains all the possible load and store addressing modes available
1561// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1562//
1563// When adding new load and store instructions, it is possible that new address
1564// flags may need to be added into MemOpFlags, and a new addressing mode will
1565// need to be added to AddrMode. An entry of the new addressing mode (consisting
1566// of the minimal and main distinguishing address flags for the new load/store
1567// instructions) will need to be added into initializeAddrModeMap() below.
1568// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1569// need to be updated to account for selecting the optimal addressing mode.
1570// *****************************************************************************
1571/// Initialize the map that relates the different addressing modes of the load
1572/// and store instructions to a set of flags. This ensures the load/store
1573/// instruction is correctly matched during instruction selection.
1574void PPCTargetLowering::initializeAddrModeMap() {
1575 AddrModesMap[PPC::AM_DForm] = {
1576 // LWZ, STW
1581 // LBZ, LHZ, STB, STH
1586 // LHA
1591 // LFS, LFD, STFS, STFD
1596 };
1597 AddrModesMap[PPC::AM_DSForm] = {
1598 // LWA
1602 // LD, STD
1606 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1610 };
1611 AddrModesMap[PPC::AM_DQForm] = {
1612 // LXV, STXV
1616 };
1617 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1619 // TODO: Add mapping for quadword load/store.
1620}
1621
1622/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1623/// the desired ByVal argument alignment.
1624static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1625 if (MaxAlign == MaxMaxAlign)
1626 return;
1627 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1628 if (MaxMaxAlign >= 32 &&
1629 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1630 MaxAlign = Align(32);
1631 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1632 MaxAlign < 16)
1633 MaxAlign = Align(16);
1634 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1635 Align EltAlign;
1636 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1637 if (EltAlign > MaxAlign)
1638 MaxAlign = EltAlign;
1639 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1640 for (auto *EltTy : STy->elements()) {
1641 Align EltAlign;
1642 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1643 if (EltAlign > MaxAlign)
1644 MaxAlign = EltAlign;
1645 if (MaxAlign == MaxMaxAlign)
1646 break;
1647 }
1648 }
1649}
1650
1651/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1652/// function arguments in the caller parameter area.
1654 const DataLayout &DL) const {
1655 // 16byte and wider vectors are passed on 16byte boundary.
1656 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1657 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1658 if (Subtarget.hasAltivec())
1659 getMaxByValAlign(Ty, Alignment, Align(16));
1660 return Alignment;
1661}
1662
1664 return Subtarget.useSoftFloat();
1665}
1666
1668 return Subtarget.hasSPE();
1669}
1670
1672 return VT.isScalarInteger();
1673}
1674
1676 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1677 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1678 return false;
1679
1680 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1681 if (VTy->getScalarType()->isIntegerTy()) {
1682 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1683 if (ElemSizeInBits == 32) {
1684 Index = Subtarget.isLittleEndian() ? 2 : 1;
1685 return true;
1686 }
1687 if (ElemSizeInBits == 64) {
1688 Index = Subtarget.isLittleEndian() ? 1 : 0;
1689 return true;
1690 }
1691 }
1692 }
1693 return false;
1694}
1695
1697 EVT VT) const {
1698 if (!VT.isVector())
1699 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1700
1702}
1703
1705 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1706 return true;
1707}
1708
1709//===----------------------------------------------------------------------===//
1710// Node matching predicates, for use by the tblgen matching code.
1711//===----------------------------------------------------------------------===//
1712
1713/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1716 return CFP->getValueAPF().isZero();
1717 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1718 // Maybe this has already been legalized into the constant pool?
1719 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1720 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1721 return CFP->getValueAPF().isZero();
1722 }
1723 return false;
1724}
1725
1726/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1727/// true if Op is undef or if it matches the specified value.
1728static bool isConstantOrUndef(int Op, int Val) {
1729 return Op < 0 || Op == Val;
1730}
1731
1732/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1733/// VPKUHUM instruction.
1734/// The ShuffleKind distinguishes between big-endian operations with
1735/// two different inputs (0), either-endian operations with two identical
1736/// inputs (1), and little-endian operations with two different inputs (2).
1737/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1739 SelectionDAG &DAG) {
1740 bool IsLE = DAG.getDataLayout().isLittleEndian();
1741 if (ShuffleKind == 0) {
1742 if (IsLE)
1743 return false;
1744 for (unsigned i = 0; i != 16; ++i)
1745 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1746 return false;
1747 } else if (ShuffleKind == 2) {
1748 if (!IsLE)
1749 return false;
1750 for (unsigned i = 0; i != 16; ++i)
1751 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1752 return false;
1753 } else if (ShuffleKind == 1) {
1754 unsigned j = IsLE ? 0 : 1;
1755 for (unsigned i = 0; i != 8; ++i)
1756 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1757 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1758 return false;
1759 }
1760 return true;
1761}
1762
1763/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1764/// VPKUWUM instruction.
1765/// The ShuffleKind distinguishes between big-endian operations with
1766/// two different inputs (0), either-endian operations with two identical
1767/// inputs (1), and little-endian operations with two different inputs (2).
1768/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1770 SelectionDAG &DAG) {
1771 bool IsLE = DAG.getDataLayout().isLittleEndian();
1772 if (ShuffleKind == 0) {
1773 if (IsLE)
1774 return false;
1775 for (unsigned i = 0; i != 16; i += 2)
1776 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1777 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1778 return false;
1779 } else if (ShuffleKind == 2) {
1780 if (!IsLE)
1781 return false;
1782 for (unsigned i = 0; i != 16; i += 2)
1783 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1784 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1785 return false;
1786 } else if (ShuffleKind == 1) {
1787 unsigned j = IsLE ? 0 : 2;
1788 for (unsigned i = 0; i != 8; i += 2)
1789 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1790 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1791 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1792 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1793 return false;
1794 }
1795 return true;
1796}
1797
1798/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1799/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1800/// current subtarget.
1801///
1802/// The ShuffleKind distinguishes between big-endian operations with
1803/// two different inputs (0), either-endian operations with two identical
1804/// inputs (1), and little-endian operations with two different inputs (2).
1805/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1807 SelectionDAG &DAG) {
1808 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1809 if (!Subtarget.hasP8Vector())
1810 return false;
1811
1812 bool IsLE = DAG.getDataLayout().isLittleEndian();
1813 if (ShuffleKind == 0) {
1814 if (IsLE)
1815 return false;
1816 for (unsigned i = 0; i != 16; i += 4)
1817 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1818 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1819 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1820 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1821 return false;
1822 } else if (ShuffleKind == 2) {
1823 if (!IsLE)
1824 return false;
1825 for (unsigned i = 0; i != 16; i += 4)
1826 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1827 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1828 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1829 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1830 return false;
1831 } else if (ShuffleKind == 1) {
1832 unsigned j = IsLE ? 0 : 4;
1833 for (unsigned i = 0; i != 8; i += 4)
1834 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1835 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1836 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1837 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1838 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1839 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1840 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1841 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1842 return false;
1843 }
1844 return true;
1845}
1846
1847/// isVMerge - Common function, used to match vmrg* shuffles.
1848///
1849static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1850 unsigned LHSStart, unsigned RHSStart) {
1851 if (N->getValueType(0) != MVT::v16i8)
1852 return false;
1853 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1854 "Unsupported merge size!");
1855
1856 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1857 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1858 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1859 LHSStart+j+i*UnitSize) ||
1860 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1861 RHSStart+j+i*UnitSize))
1862 return false;
1863 }
1864 return true;
1865}
1866
1867/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1868/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1869/// The ShuffleKind distinguishes between big-endian merges with two
1870/// different inputs (0), either-endian merges with two identical inputs (1),
1871/// and little-endian merges with two different inputs (2). For the latter,
1872/// the input operands are swapped (see PPCInstrAltivec.td).
1874 unsigned ShuffleKind, SelectionDAG &DAG) {
1875 if (DAG.getDataLayout().isLittleEndian()) {
1876 if (ShuffleKind == 1) // unary
1877 return isVMerge(N, UnitSize, 0, 0);
1878 else if (ShuffleKind == 2) // swapped
1879 return isVMerge(N, UnitSize, 0, 16);
1880 else
1881 return false;
1882 } else {
1883 if (ShuffleKind == 1) // unary
1884 return isVMerge(N, UnitSize, 8, 8);
1885 else if (ShuffleKind == 0) // normal
1886 return isVMerge(N, UnitSize, 8, 24);
1887 else
1888 return false;
1889 }
1890}
1891
1892/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1893/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1894/// The ShuffleKind distinguishes between big-endian merges with two
1895/// different inputs (0), either-endian merges with two identical inputs (1),
1896/// and little-endian merges with two different inputs (2). For the latter,
1897/// the input operands are swapped (see PPCInstrAltivec.td).
1899 unsigned ShuffleKind, SelectionDAG &DAG) {
1900 if (DAG.getDataLayout().isLittleEndian()) {
1901 if (ShuffleKind == 1) // unary
1902 return isVMerge(N, UnitSize, 8, 8);
1903 else if (ShuffleKind == 2) // swapped
1904 return isVMerge(N, UnitSize, 8, 24);
1905 else
1906 return false;
1907 } else {
1908 if (ShuffleKind == 1) // unary
1909 return isVMerge(N, UnitSize, 0, 0);
1910 else if (ShuffleKind == 0) // normal
1911 return isVMerge(N, UnitSize, 0, 16);
1912 else
1913 return false;
1914 }
1915}
1916
1917/**
1918 * Common function used to match vmrgew and vmrgow shuffles
1919 *
1920 * The indexOffset determines whether to look for even or odd words in
1921 * the shuffle mask. This is based on the of the endianness of the target
1922 * machine.
1923 * - Little Endian:
1924 * - Use offset of 0 to check for odd elements
1925 * - Use offset of 4 to check for even elements
1926 * - Big Endian:
1927 * - Use offset of 0 to check for even elements
1928 * - Use offset of 4 to check for odd elements
1929 * A detailed description of the vector element ordering for little endian and
1930 * big endian can be found at
1931 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1932 * Targeting your applications - what little endian and big endian IBM XL C/C++
1933 * compiler differences mean to you
1934 *
1935 * The mask to the shuffle vector instruction specifies the indices of the
1936 * elements from the two input vectors to place in the result. The elements are
1937 * numbered in array-access order, starting with the first vector. These vectors
1938 * are always of type v16i8, thus each vector will contain 16 elements of size
1939 * 8. More info on the shuffle vector can be found in the
1940 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1941 * Language Reference.
1942 *
1943 * The RHSStartValue indicates whether the same input vectors are used (unary)
1944 * or two different input vectors are used, based on the following:
1945 * - If the instruction uses the same vector for both inputs, the range of the
1946 * indices will be 0 to 15. In this case, the RHSStart value passed should
1947 * be 0.
1948 * - If the instruction has two different vectors then the range of the
1949 * indices will be 0 to 31. In this case, the RHSStart value passed should
1950 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1951 * to 31 specify elements in the second vector).
1952 *
1953 * \param[in] N The shuffle vector SD Node to analyze
1954 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1955 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1956 * vector to the shuffle_vector instruction
1957 * \return true iff this shuffle vector represents an even or odd word merge
1958 */
1959static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1960 unsigned RHSStartValue) {
1961 if (N->getValueType(0) != MVT::v16i8)
1962 return false;
1963
1964 for (unsigned i = 0; i < 2; ++i)
1965 for (unsigned j = 0; j < 4; ++j)
1966 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1967 i*RHSStartValue+j+IndexOffset) ||
1968 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1969 i*RHSStartValue+j+IndexOffset+8))
1970 return false;
1971 return true;
1972}
1973
1974/**
1975 * Determine if the specified shuffle mask is suitable for the vmrgew or
1976 * vmrgow instructions.
1977 *
1978 * \param[in] N The shuffle vector SD Node to analyze
1979 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1980 * \param[in] ShuffleKind Identify the type of merge:
1981 * - 0 = big-endian merge with two different inputs;
1982 * - 1 = either-endian merge with two identical inputs;
1983 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1984 * little-endian merges).
1985 * \param[in] DAG The current SelectionDAG
1986 * \return true iff this shuffle mask
1987 */
1989 unsigned ShuffleKind, SelectionDAG &DAG) {
1990 if (DAG.getDataLayout().isLittleEndian()) {
1991 unsigned indexOffset = CheckEven ? 4 : 0;
1992 if (ShuffleKind == 1) // Unary
1993 return isVMerge(N, indexOffset, 0);
1994 else if (ShuffleKind == 2) // swapped
1995 return isVMerge(N, indexOffset, 16);
1996 else
1997 return false;
1998 }
1999 else {
2000 unsigned indexOffset = CheckEven ? 0 : 4;
2001 if (ShuffleKind == 1) // Unary
2002 return isVMerge(N, indexOffset, 0);
2003 else if (ShuffleKind == 0) // Normal
2004 return isVMerge(N, indexOffset, 16);
2005 else
2006 return false;
2007 }
2008 return false;
2009}
2010
2011/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2012/// amount, otherwise return -1.
2013/// The ShuffleKind distinguishes between big-endian operations with two
2014/// different inputs (0), either-endian operations with two identical inputs
2015/// (1), and little-endian operations with two different inputs (2). For the
2016/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2017int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2018 SelectionDAG &DAG) {
2019 if (N->getValueType(0) != MVT::v16i8)
2020 return -1;
2021
2023
2024 // Find the first non-undef value in the shuffle mask.
2025 unsigned i;
2026 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2027 /*search*/;
2028
2029 if (i == 16) return -1; // all undef.
2030
2031 // Otherwise, check to see if the rest of the elements are consecutively
2032 // numbered from this value.
2033 unsigned ShiftAmt = SVOp->getMaskElt(i);
2034 if (ShiftAmt < i) return -1;
2035
2036 ShiftAmt -= i;
2037 bool isLE = DAG.getDataLayout().isLittleEndian();
2038
2039 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2040 // Check the rest of the elements to see if they are consecutive.
2041 for (++i; i != 16; ++i)
2042 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2043 return -1;
2044 } else if (ShuffleKind == 1) {
2045 // Check the rest of the elements to see if they are consecutive.
2046 for (++i; i != 16; ++i)
2047 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2048 return -1;
2049 } else
2050 return -1;
2051
2052 if (isLE)
2053 ShiftAmt = 16 - ShiftAmt;
2054
2055 return ShiftAmt;
2056}
2057
2058/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2059/// specifies a splat of a single element that is suitable for input to
2060/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2062 EVT VT = N->getValueType(0);
2063 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2064 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2065
2066 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2067 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2068
2069 // The consecutive indices need to specify an element, not part of two
2070 // different elements. So abandon ship early if this isn't the case.
2071 if (N->getMaskElt(0) % EltSize != 0)
2072 return false;
2073
2074 // This is a splat operation if each element of the permute is the same, and
2075 // if the value doesn't reference the second vector.
2076 unsigned ElementBase = N->getMaskElt(0);
2077
2078 // FIXME: Handle UNDEF elements too!
2079 if (ElementBase >= 16)
2080 return false;
2081
2082 // Check that the indices are consecutive, in the case of a multi-byte element
2083 // splatted with a v16i8 mask.
2084 for (unsigned i = 1; i != EltSize; ++i)
2085 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2086 return false;
2087
2088 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2089 // An UNDEF element is a sequence of UNDEF bytes.
2090 if (N->getMaskElt(i) < 0) {
2091 for (unsigned j = 1; j != EltSize; ++j)
2092 if (N->getMaskElt(i + j) >= 0)
2093 return false;
2094 } else
2095 for (unsigned j = 0; j != EltSize; ++j)
2096 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2097 return false;
2098 }
2099 return true;
2100}
2101
2102/// Check that the mask is shuffling N byte elements. Within each N byte
2103/// element of the mask, the indices could be either in increasing or
2104/// decreasing order as long as they are consecutive.
2105/// \param[in] N the shuffle vector SD Node to analyze
2106/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2107/// Word/DoubleWord/QuadWord).
2108/// \param[in] StepLen the delta indices number among the N byte element, if
2109/// the mask is in increasing/decreasing order then it is 1/-1.
2110/// \return true iff the mask is shuffling N byte elements.
2111static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2112 int StepLen) {
2113 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2114 "Unexpected element width.");
2115 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2116
2117 unsigned NumOfElem = 16 / Width;
2118 unsigned MaskVal[16]; // Width is never greater than 16
2119 for (unsigned i = 0; i < NumOfElem; ++i) {
2120 MaskVal[0] = N->getMaskElt(i * Width);
2121 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2122 return false;
2123 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2124 return false;
2125 }
2126
2127 for (unsigned int j = 1; j < Width; ++j) {
2128 MaskVal[j] = N->getMaskElt(i * Width + j);
2129 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2130 return false;
2131 }
2132 }
2133 }
2134
2135 return true;
2136}
2137
2138bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2139 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2140 if (!isNByteElemShuffleMask(N, 4, 1))
2141 return false;
2142
2143 // Now we look at mask elements 0,4,8,12
2144 unsigned M0 = N->getMaskElt(0) / 4;
2145 unsigned M1 = N->getMaskElt(4) / 4;
2146 unsigned M2 = N->getMaskElt(8) / 4;
2147 unsigned M3 = N->getMaskElt(12) / 4;
2148 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2149 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2150
2151 // Below, let H and L be arbitrary elements of the shuffle mask
2152 // where H is in the range [4,7] and L is in the range [0,3].
2153 // H, 1, 2, 3 or L, 5, 6, 7
2154 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2155 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2156 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2157 InsertAtByte = IsLE ? 12 : 0;
2158 Swap = M0 < 4;
2159 return true;
2160 }
2161 // 0, H, 2, 3 or 4, L, 6, 7
2162 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2163 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2164 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2165 InsertAtByte = IsLE ? 8 : 4;
2166 Swap = M1 < 4;
2167 return true;
2168 }
2169 // 0, 1, H, 3 or 4, 5, L, 7
2170 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2171 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2172 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2173 InsertAtByte = IsLE ? 4 : 8;
2174 Swap = M2 < 4;
2175 return true;
2176 }
2177 // 0, 1, 2, H or 4, 5, 6, L
2178 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2179 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2180 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2181 InsertAtByte = IsLE ? 0 : 12;
2182 Swap = M3 < 4;
2183 return true;
2184 }
2185
2186 // If both vector operands for the shuffle are the same vector, the mask will
2187 // contain only elements from the first one and the second one will be undef.
2188 if (N->getOperand(1).isUndef()) {
2189 ShiftElts = 0;
2190 Swap = true;
2191 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2192 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2193 InsertAtByte = IsLE ? 12 : 0;
2194 return true;
2195 }
2196 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2197 InsertAtByte = IsLE ? 8 : 4;
2198 return true;
2199 }
2200 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2201 InsertAtByte = IsLE ? 4 : 8;
2202 return true;
2203 }
2204 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2205 InsertAtByte = IsLE ? 0 : 12;
2206 return true;
2207 }
2208 }
2209
2210 return false;
2211}
2212
2214 bool &Swap, bool IsLE) {
2215 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2216 // Ensure each byte index of the word is consecutive.
2217 if (!isNByteElemShuffleMask(N, 4, 1))
2218 return false;
2219
2220 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2221 unsigned M0 = N->getMaskElt(0) / 4;
2222 unsigned M1 = N->getMaskElt(4) / 4;
2223 unsigned M2 = N->getMaskElt(8) / 4;
2224 unsigned M3 = N->getMaskElt(12) / 4;
2225
2226 // If both vector operands for the shuffle are the same vector, the mask will
2227 // contain only elements from the first one and the second one will be undef.
2228 if (N->getOperand(1).isUndef()) {
2229 assert(M0 < 4 && "Indexing into an undef vector?");
2230 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2231 return false;
2232
2233 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2234 Swap = false;
2235 return true;
2236 }
2237
2238 // Ensure each word index of the ShuffleVector Mask is consecutive.
2239 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2240 return false;
2241
2242 if (IsLE) {
2243 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2244 // Input vectors don't need to be swapped if the leading element
2245 // of the result is one of the 3 left elements of the second vector
2246 // (or if there is no shift to be done at all).
2247 Swap = false;
2248 ShiftElts = (8 - M0) % 8;
2249 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2250 // Input vectors need to be swapped if the leading element
2251 // of the result is one of the 3 left elements of the first vector
2252 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2253 Swap = true;
2254 ShiftElts = (4 - M0) % 4;
2255 }
2256
2257 return true;
2258 } else { // BE
2259 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2260 // Input vectors don't need to be swapped if the leading element
2261 // of the result is one of the 4 elements of the first vector.
2262 Swap = false;
2263 ShiftElts = M0;
2264 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2265 // Input vectors need to be swapped if the leading element
2266 // of the result is one of the 4 elements of the right vector.
2267 Swap = true;
2268 ShiftElts = M0 - 4;
2269 }
2270
2271 return true;
2272 }
2273}
2274
2276 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2277
2278 if (!isNByteElemShuffleMask(N, Width, -1))
2279 return false;
2280
2281 for (int i = 0; i < 16; i += Width)
2282 if (N->getMaskElt(i) != i + Width - 1)
2283 return false;
2284
2285 return true;
2286}
2287
2291
2295
2299
2303
2304/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2305/// if the inputs to the instruction should be swapped and set \p DM to the
2306/// value for the immediate.
2307/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2308/// AND element 0 of the result comes from the first input (LE) or second input
2309/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2310/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2311/// mask.
2313 bool &Swap, bool IsLE) {
2314 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2315
2316 // Ensure each byte index of the double word is consecutive.
2317 if (!isNByteElemShuffleMask(N, 8, 1))
2318 return false;
2319
2320 unsigned M0 = N->getMaskElt(0) / 8;
2321 unsigned M1 = N->getMaskElt(8) / 8;
2322 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2323
2324 // If both vector operands for the shuffle are the same vector, the mask will
2325 // contain only elements from the first one and the second one will be undef.
2326 if (N->getOperand(1).isUndef()) {
2327 if ((M0 | M1) < 2) {
2328 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2329 Swap = false;
2330 return true;
2331 } else
2332 return false;
2333 }
2334
2335 if (IsLE) {
2336 if (M0 > 1 && M1 < 2) {
2337 Swap = false;
2338 } else if (M0 < 2 && M1 > 1) {
2339 M0 = (M0 + 2) % 4;
2340 M1 = (M1 + 2) % 4;
2341 Swap = true;
2342 } else
2343 return false;
2344
2345 // Note: if control flow comes here that means Swap is already set above
2346 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2347 return true;
2348 } else { // BE
2349 if (M0 < 2 && M1 > 1) {
2350 Swap = false;
2351 } else if (M0 > 1 && M1 < 2) {
2352 M0 = (M0 + 2) % 4;
2353 M1 = (M1 + 2) % 4;
2354 Swap = true;
2355 } else
2356 return false;
2357
2358 // Note: if control flow comes here that means Swap is already set above
2359 DM = (M0 << 1) + (M1 & 1);
2360 return true;
2361 }
2362}
2363
2364
2365/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2366/// appropriate for PPC mnemonics (which have a big endian bias - namely
2367/// elements are counted from the left of the vector register).
2368unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2369 SelectionDAG &DAG) {
2371 assert(isSplatShuffleMask(SVOp, EltSize));
2372 EVT VT = SVOp->getValueType(0);
2373
2374 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2375 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2376 : SVOp->getMaskElt(0);
2377
2378 if (DAG.getDataLayout().isLittleEndian())
2379 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2380 else
2381 return SVOp->getMaskElt(0) / EltSize;
2382}
2383
2384/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2385/// by using a vspltis[bhw] instruction of the specified element size, return
2386/// the constant being splatted. The ByteSize field indicates the number of
2387/// bytes of each element [124] -> [bhw].
2389 SDValue OpVal;
2390
2391 // If ByteSize of the splat is bigger than the element size of the
2392 // build_vector, then we have a case where we are checking for a splat where
2393 // multiple elements of the buildvector are folded together into a single
2394 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2395 unsigned EltSize = 16/N->getNumOperands();
2396 if (EltSize < ByteSize) {
2397 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2398 SDValue UniquedVals[4];
2399 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2400
2401 // See if all of the elements in the buildvector agree across.
2402 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2403 if (N->getOperand(i).isUndef()) continue;
2404 // If the element isn't a constant, bail fully out.
2405 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2406
2407 if (!UniquedVals[i&(Multiple-1)].getNode())
2408 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2409 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2410 return SDValue(); // no match.
2411 }
2412
2413 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2414 // either constant or undef values that are identical for each chunk. See
2415 // if these chunks can form into a larger vspltis*.
2416
2417 // Check to see if all of the leading entries are either 0 or -1. If
2418 // neither, then this won't fit into the immediate field.
2419 bool LeadingZero = true;
2420 bool LeadingOnes = true;
2421 for (unsigned i = 0; i != Multiple-1; ++i) {
2422 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2423
2424 LeadingZero &= isNullConstant(UniquedVals[i]);
2425 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2426 }
2427 // Finally, check the least significant entry.
2428 if (LeadingZero) {
2429 if (!UniquedVals[Multiple-1].getNode())
2430 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2431 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2432 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2433 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2434 }
2435 if (LeadingOnes) {
2436 if (!UniquedVals[Multiple-1].getNode())
2437 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2438 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2439 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2440 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2441 }
2442
2443 return SDValue();
2444 }
2445
2446 // Check to see if this buildvec has a single non-undef value in its elements.
2447 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2448 if (N->getOperand(i).isUndef()) continue;
2449 if (!OpVal.getNode())
2450 OpVal = N->getOperand(i);
2451 else if (OpVal != N->getOperand(i))
2452 return SDValue();
2453 }
2454
2455 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2456
2457 unsigned ValSizeInBytes = EltSize;
2458 uint64_t Value = 0;
2459 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2460 Value = CN->getZExtValue();
2461 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2462 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2463 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2464 }
2465
2466 // If the splat value is larger than the element value, then we can never do
2467 // this splat. The only case that we could fit the replicated bits into our
2468 // immediate field for would be zero, and we prefer to use vxor for it.
2469 if (ValSizeInBytes < ByteSize) return SDValue();
2470
2471 // If the element value is larger than the splat value, check if it consists
2472 // of a repeated bit pattern of size ByteSize.
2473 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2474 return SDValue();
2475
2476 // Properly sign extend the value.
2477 int MaskVal = SignExtend32(Value, ByteSize * 8);
2478
2479 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2480 if (MaskVal == 0) return SDValue();
2481
2482 // Finally, if this value fits in a 5 bit sext field, return it
2483 if (SignExtend32<5>(MaskVal) == MaskVal)
2484 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2485 return SDValue();
2486}
2487
2488//===----------------------------------------------------------------------===//
2489// Addressing Mode Selection
2490//===----------------------------------------------------------------------===//
2491
2492/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2493/// or 64-bit immediate, and if the value can be accurately represented as a
2494/// sign extension from a 16-bit value. If so, this returns true and the
2495/// immediate.
2496bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2497 if (!isa<ConstantSDNode>(N))
2498 return false;
2499
2500 Imm = (int16_t)N->getAsZExtVal();
2501 if (N->getValueType(0) == MVT::i32)
2502 return Imm == (int32_t)N->getAsZExtVal();
2503 else
2504 return Imm == (int64_t)N->getAsZExtVal();
2505}
2507 return isIntS16Immediate(Op.getNode(), Imm);
2508}
2509
2510/// Used when computing address flags for selecting loads and stores.
2511/// If we have an OR, check if the LHS and RHS are provably disjoint.
2512/// An OR of two provably disjoint values is equivalent to an ADD.
2513/// Most PPC load/store instructions compute the effective address as a sum,
2514/// so doing this conversion is useful.
2515static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2516 if (N.getOpcode() != ISD::OR)
2517 return false;
2518 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2519 if (!LHSKnown.Zero.getBoolValue())
2520 return false;
2521 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2522 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2523}
2524
2525/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2526/// be represented as an indexed [r+r] operation.
2528 SDValue &Index,
2529 SelectionDAG &DAG) const {
2530 for (SDNode *U : N->users()) {
2531 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2532 if (Memop->getMemoryVT() == MVT::f64) {
2533 Base = N.getOperand(0);
2534 Index = N.getOperand(1);
2535 return true;
2536 }
2537 }
2538 }
2539 return false;
2540}
2541
2542/// isIntS34Immediate - This method tests if value of node given can be
2543/// accurately represented as a sign extension from a 34-bit value. If so,
2544/// this returns true and the immediate.
2545bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2546 if (!isa<ConstantSDNode>(N))
2547 return false;
2548
2549 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2550 return isInt<34>(Imm);
2551}
2553 return isIntS34Immediate(Op.getNode(), Imm);
2554}
2555
2556/// SelectAddressRegReg - Given the specified addressed, check to see if it
2557/// can be represented as an indexed [r+r] operation. Returns false if it
2558/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2559/// non-zero and N can be represented by a base register plus a signed 16-bit
2560/// displacement, make a more precise judgement by checking (displacement % \p
2561/// EncodingAlignment).
2563 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2564 MaybeAlign EncodingAlignment) const {
2565 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2566 // a [pc+imm].
2568 return false;
2569
2570 int16_t Imm = 0;
2571 if (N.getOpcode() == ISD::ADD) {
2572 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2573 // SPE load/store can only handle 8-bit offsets.
2574 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2575 return true;
2576 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2577 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2578 return false; // r+i
2579 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2580 return false; // r+i
2581
2582 Base = N.getOperand(0);
2583 Index = N.getOperand(1);
2584 return true;
2585 } else if (N.getOpcode() == ISD::OR) {
2586 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2587 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2588 return false; // r+i can fold it if we can.
2589
2590 // If this is an or of disjoint bitfields, we can codegen this as an add
2591 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2592 // disjoint.
2593 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2594
2595 if (LHSKnown.Zero.getBoolValue()) {
2596 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2597 // If all of the bits are known zero on the LHS or RHS, the add won't
2598 // carry.
2599 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2600 Base = N.getOperand(0);
2601 Index = N.getOperand(1);
2602 return true;
2603 }
2604 }
2605 }
2606
2607 return false;
2608}
2609
2610// If we happen to be doing an i64 load or store into a stack slot that has
2611// less than a 4-byte alignment, then the frame-index elimination may need to
2612// use an indexed load or store instruction (because the offset may not be a
2613// multiple of 4). The extra register needed to hold the offset comes from the
2614// register scavenger, and it is possible that the scavenger will need to use
2615// an emergency spill slot. As a result, we need to make sure that a spill slot
2616// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2617// stack slot.
2618static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2619 // FIXME: This does not handle the LWA case.
2620 if (VT != MVT::i64)
2621 return;
2622
2623 // NOTE: We'll exclude negative FIs here, which come from argument
2624 // lowering, because there are no known test cases triggering this problem
2625 // using packed structures (or similar). We can remove this exclusion if
2626 // we find such a test case. The reason why this is so test-case driven is
2627 // because this entire 'fixup' is only to prevent crashes (from the
2628 // register scavenger) on not-really-valid inputs. For example, if we have:
2629 // %a = alloca i1
2630 // %b = bitcast i1* %a to i64*
2631 // store i64* a, i64 b
2632 // then the store should really be marked as 'align 1', but is not. If it
2633 // were marked as 'align 1' then the indexed form would have been
2634 // instruction-selected initially, and the problem this 'fixup' is preventing
2635 // won't happen regardless.
2636 if (FrameIdx < 0)
2637 return;
2638
2640 MachineFrameInfo &MFI = MF.getFrameInfo();
2641
2642 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2643 return;
2644
2645 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2646 FuncInfo->setHasNonRISpills();
2647}
2648
2649/// Returns true if the address N can be represented by a base register plus
2650/// a signed 16-bit displacement [r+imm], and if it is not better
2651/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2652/// displacements that are multiples of that value.
2654 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2655 MaybeAlign EncodingAlignment) const {
2656 // FIXME dl should come from parent load or store, not from address
2657 SDLoc dl(N);
2658
2659 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2660 // a [pc+imm].
2662 return false;
2663
2664 // If this can be more profitably realized as r+r, fail.
2665 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2666 return false;
2667
2668 if (N.getOpcode() == ISD::ADD) {
2669 int16_t imm = 0;
2670 if (isIntS16Immediate(N.getOperand(1), imm) &&
2671 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2672 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2673 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2674 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2675 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2676 } else {
2677 Base = N.getOperand(0);
2678 }
2679 return true; // [r+i]
2680 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2681 // Match LOAD (ADD (X, Lo(G))).
2682 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2683 "Cannot handle constant offsets yet!");
2684 Disp = N.getOperand(1).getOperand(0); // The global address.
2689 Base = N.getOperand(0);
2690 return true; // [&g+r]
2691 }
2692 } else if (N.getOpcode() == ISD::OR) {
2693 int16_t imm = 0;
2694 if (isIntS16Immediate(N.getOperand(1), imm) &&
2695 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2696 // If this is an or of disjoint bitfields, we can codegen this as an add
2697 // (for better address arithmetic) if the LHS and RHS of the OR are
2698 // provably disjoint.
2699 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2700
2701 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2702 // If all of the bits are known zero on the LHS or RHS, the add won't
2703 // carry.
2704 if (FrameIndexSDNode *FI =
2705 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2706 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2707 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2708 } else {
2709 Base = N.getOperand(0);
2710 }
2711 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2712 return true;
2713 }
2714 }
2715 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2716 // Loading from a constant address.
2717
2718 // If this address fits entirely in a 16-bit sext immediate field, codegen
2719 // this as "d, 0"
2720 int16_t Imm;
2721 if (isIntS16Immediate(CN, Imm) &&
2722 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2723 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2724 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2725 CN->getValueType(0));
2726 return true;
2727 }
2728
2729 // Handle 32-bit sext immediates with LIS + addr mode.
2730 if ((CN->getValueType(0) == MVT::i32 ||
2731 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2732 (!EncodingAlignment ||
2733 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2734 int Addr = (int)CN->getZExtValue();
2735
2736 // Otherwise, break this down into an LIS + disp.
2737 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2738
2739 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2740 MVT::i32);
2741 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2742 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2743 return true;
2744 }
2745 }
2746
2747 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2749 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2750 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2751 } else
2752 Base = N;
2753 return true; // [r+0]
2754}
2755
2756/// Similar to the 16-bit case but for instructions that take a 34-bit
2757/// displacement field (prefixed loads/stores).
2759 SDValue &Base,
2760 SelectionDAG &DAG) const {
2761 // Only on 64-bit targets.
2762 if (N.getValueType() != MVT::i64)
2763 return false;
2764
2765 SDLoc dl(N);
2766 int64_t Imm = 0;
2767
2768 if (N.getOpcode() == ISD::ADD) {
2769 if (!isIntS34Immediate(N.getOperand(1), Imm))
2770 return false;
2771 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2772 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2773 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2774 else
2775 Base = N.getOperand(0);
2776 return true;
2777 }
2778
2779 if (N.getOpcode() == ISD::OR) {
2780 if (!isIntS34Immediate(N.getOperand(1), Imm))
2781 return false;
2782 // If this is an or of disjoint bitfields, we can codegen this as an add
2783 // (for better address arithmetic) if the LHS and RHS of the OR are
2784 // provably disjoint.
2785 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2786 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2787 return false;
2788 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2789 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2790 else
2791 Base = N.getOperand(0);
2792 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2793 return true;
2794 }
2795
2796 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2797 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2798 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2799 return true;
2800 }
2801
2802 return false;
2803}
2804
2805/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2806/// represented as an indexed [r+r] operation.
2808 SDValue &Index,
2809 SelectionDAG &DAG) const {
2810 // Check to see if we can easily represent this as an [r+r] address. This
2811 // will fail if it thinks that the address is more profitably represented as
2812 // reg+imm, e.g. where imm = 0.
2813 if (SelectAddressRegReg(N, Base, Index, DAG))
2814 return true;
2815
2816 // If the address is the result of an add, we will utilize the fact that the
2817 // address calculation includes an implicit add. However, we can reduce
2818 // register pressure if we do not materialize a constant just for use as the
2819 // index register. We only get rid of the add if it is not an add of a
2820 // value and a 16-bit signed constant and both have a single use.
2821 int16_t imm = 0;
2822 if (N.getOpcode() == ISD::ADD &&
2823 (!isIntS16Immediate(N.getOperand(1), imm) ||
2824 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2825 Base = N.getOperand(0);
2826 Index = N.getOperand(1);
2827 return true;
2828 }
2829
2830 // Otherwise, do it the hard way, using R0 as the base register.
2831 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2832 N.getValueType());
2833 Index = N;
2834 return true;
2835}
2836
2837template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2838 Ty *PCRelCand = dyn_cast<Ty>(N);
2839 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2840}
2841
2842/// Returns true if this address is a PC Relative address.
2843/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2844/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2846 // This is a materialize PC Relative node. Always select this as PC Relative.
2847 Base = N;
2848 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2849 return true;
2854 return true;
2855 return false;
2856}
2857
2858/// Returns true if we should use a direct load into vector instruction
2859/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2860static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2861
2862 // If there are any other uses other than scalar to vector, then we should
2863 // keep it as a scalar load -> direct move pattern to prevent multiple
2864 // loads.
2866 if (!LD)
2867 return false;
2868
2869 EVT MemVT = LD->getMemoryVT();
2870 if (!MemVT.isSimple())
2871 return false;
2872 switch(MemVT.getSimpleVT().SimpleTy) {
2873 case MVT::i64:
2874 break;
2875 case MVT::i32:
2876 if (!ST.hasP8Vector())
2877 return false;
2878 break;
2879 case MVT::i16:
2880 case MVT::i8:
2881 if (!ST.hasP9Vector())
2882 return false;
2883 break;
2884 default:
2885 return false;
2886 }
2887
2888 SDValue LoadedVal(N, 0);
2889 if (!LoadedVal.hasOneUse())
2890 return false;
2891
2892 for (SDUse &Use : LD->uses())
2893 if (Use.getResNo() == 0 &&
2894 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2895 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2896 return false;
2897
2898 return true;
2899}
2900
2901/// getPreIndexedAddressParts - returns true by value, base pointer and
2902/// offset pointer and addressing mode by reference if the node's address
2903/// can be legally represented as pre-indexed load / store address.
2905 SDValue &Offset,
2907 SelectionDAG &DAG) const {
2908 if (DisablePPCPreinc) return false;
2909
2910 bool isLoad = true;
2911 SDValue Ptr;
2912 EVT VT;
2913 Align Alignment;
2914 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2915 Ptr = LD->getBasePtr();
2916 VT = LD->getMemoryVT();
2917 Alignment = LD->getAlign();
2918 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2919 Ptr = ST->getBasePtr();
2920 VT = ST->getMemoryVT();
2921 Alignment = ST->getAlign();
2922 isLoad = false;
2923 } else
2924 return false;
2925
2926 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2927 // instructions because we can fold these into a more efficient instruction
2928 // instead, (such as LXSD).
2929 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2930 return false;
2931 }
2932
2933 // PowerPC doesn't have preinc load/store instructions for vectors
2934 if (VT.isVector())
2935 return false;
2936
2937 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2938 // Common code will reject creating a pre-inc form if the base pointer
2939 // is a frame index, or if N is a store and the base pointer is either
2940 // the same as or a predecessor of the value being stored. Check for
2941 // those situations here, and try with swapped Base/Offset instead.
2942 bool Swap = false;
2943
2945 Swap = true;
2946 else if (!isLoad) {
2947 SDValue Val = cast<StoreSDNode>(N)->getValue();
2948 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2949 Swap = true;
2950 }
2951
2952 if (Swap)
2954
2955 AM = ISD::PRE_INC;
2956 return true;
2957 }
2958
2959 // LDU/STU can only handle immediates that are a multiple of 4.
2960 if (VT != MVT::i64) {
2961 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
2962 return false;
2963 } else {
2964 // LDU/STU need an address with at least 4-byte alignment.
2965 if (Alignment < Align(4))
2966 return false;
2967
2968 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2969 return false;
2970 }
2971
2972 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2973 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2974 // sext i32 to i64 when addr mode is r+i.
2975 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2976 LD->getExtensionType() == ISD::SEXTLOAD &&
2978 return false;
2979 }
2980
2981 AM = ISD::PRE_INC;
2982 return true;
2983}
2984
2985//===----------------------------------------------------------------------===//
2986// LowerOperation implementation
2987//===----------------------------------------------------------------------===//
2988
2989/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2990/// and LoOpFlags to the target MO flags.
2991static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2992 unsigned &HiOpFlags, unsigned &LoOpFlags,
2993 const GlobalValue *GV = nullptr) {
2994 HiOpFlags = PPCII::MO_HA;
2995 LoOpFlags = PPCII::MO_LO;
2996
2997 // Don't use the pic base if not in PIC relocation model.
2998 if (IsPIC) {
2999 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3000 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3001 }
3002}
3003
3004static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3005 SelectionDAG &DAG) {
3006 SDLoc DL(HiPart);
3007 EVT PtrVT = HiPart.getValueType();
3008 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3009
3010 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3011 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3012
3013 // With PIC, the first instruction is actually "GR+hi(&G)".
3014 if (isPIC)
3015 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3016 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3017
3018 // Generate non-pic code that has direct accesses to the constant pool.
3019 // The address of the global is just (hi(&g)+lo(&g)).
3020 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3021}
3022
3024 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3025 FuncInfo->setUsesTOCBasePtr();
3026}
3027
3031
3032SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3033 SDValue GA) const {
3034 EVT VT = Subtarget.getScalarIntVT();
3035 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3036 : Subtarget.isAIXABI()
3037 ? DAG.getRegister(PPC::R2, VT)
3038 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3039 SDValue Ops[] = { GA, Reg };
3040 return DAG.getMemIntrinsicNode(
3041 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3044}
3045
3046SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3047 SelectionDAG &DAG) const {
3048 EVT PtrVT = Op.getValueType();
3049 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3050 const Constant *C = CP->getConstVal();
3051
3052 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3053 // The actual address of the GlobalValue is stored in the TOC.
3054 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3055 if (Subtarget.isUsingPCRelativeCalls()) {
3056 SDLoc DL(CP);
3057 EVT Ty = getPointerTy(DAG.getDataLayout());
3058 SDValue ConstPool = DAG.getTargetConstantPool(
3059 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3060 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3061 }
3062 setUsesTOCBasePtr(DAG);
3063 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3064 return getTOCEntry(DAG, SDLoc(CP), GA);
3065 }
3066
3067 unsigned MOHiFlag, MOLoFlag;
3068 bool IsPIC = isPositionIndependent();
3069 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3070
3071 if (IsPIC && Subtarget.isSVR4ABI()) {
3072 SDValue GA =
3074 return getTOCEntry(DAG, SDLoc(CP), GA);
3075 }
3076
3077 SDValue CPIHi =
3078 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3079 SDValue CPILo =
3080 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3081 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3082}
3083
3084// For 64-bit PowerPC, prefer the more compact relative encodings.
3085// This trades 32 bits per jump table entry for one or two instructions
3086// on the jump site.
3093
3096 return false;
3097 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3098 return true;
3100}
3101
3103 SelectionDAG &DAG) const {
3104 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3106
3107 switch (getTargetMachine().getCodeModel()) {
3108 case CodeModel::Small:
3109 case CodeModel::Medium:
3111 default:
3112 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3114 }
3115}
3116
3117const MCExpr *
3119 unsigned JTI,
3120 MCContext &Ctx) const {
3121 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3123
3124 switch (getTargetMachine().getCodeModel()) {
3125 case CodeModel::Small:
3126 case CodeModel::Medium:
3128 default:
3129 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3130 }
3131}
3132
3133SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3134 EVT PtrVT = Op.getValueType();
3136
3137 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3138 if (Subtarget.isUsingPCRelativeCalls()) {
3139 SDLoc DL(JT);
3140 EVT Ty = getPointerTy(DAG.getDataLayout());
3141 SDValue GA =
3143 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3144 return MatAddr;
3145 }
3146
3147 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3148 // The actual address of the GlobalValue is stored in the TOC.
3149 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3150 setUsesTOCBasePtr(DAG);
3151 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3152 return getTOCEntry(DAG, SDLoc(JT), GA);
3153 }
3154
3155 unsigned MOHiFlag, MOLoFlag;
3156 bool IsPIC = isPositionIndependent();
3157 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3158
3159 if (IsPIC && Subtarget.isSVR4ABI()) {
3160 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3162 return getTOCEntry(DAG, SDLoc(GA), GA);
3163 }
3164
3165 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3166 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3167 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3168}
3169
3170SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3171 SelectionDAG &DAG) const {
3172 EVT PtrVT = Op.getValueType();
3173 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3174 const BlockAddress *BA = BASDN->getBlockAddress();
3175
3176 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3177 if (Subtarget.isUsingPCRelativeCalls()) {
3178 SDLoc DL(BASDN);
3179 EVT Ty = getPointerTy(DAG.getDataLayout());
3180 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3182 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3183 return MatAddr;
3184 }
3185
3186 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3187 // The actual BlockAddress is stored in the TOC.
3188 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3189 setUsesTOCBasePtr(DAG);
3190 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3191 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3192 }
3193
3194 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3195 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3196 return getTOCEntry(
3197 DAG, SDLoc(BASDN),
3198 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3199
3200 unsigned MOHiFlag, MOLoFlag;
3201 bool IsPIC = isPositionIndependent();
3202 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3203 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3204 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3205 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3206}
3207
3208SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3209 SelectionDAG &DAG) const {
3210 if (Subtarget.isAIXABI())
3211 return LowerGlobalTLSAddressAIX(Op, DAG);
3212
3213 return LowerGlobalTLSAddressLinux(Op, DAG);
3214}
3215
3216/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3217/// and then apply the update.
3219 SelectionDAG &DAG,
3220 const TargetMachine &TM) {
3221 // Initialize TLS model opt setting lazily:
3222 // (1) Use initial-exec for single TLS var references within current function.
3223 // (2) Use local-dynamic for multiple TLS var references within current
3224 // function.
3225 PPCFunctionInfo *FuncInfo =
3227 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3229 // Iterate over all instructions within current function, collect all TLS
3230 // global variables (global variables taken as the first parameter to
3231 // Intrinsic::threadlocal_address).
3232 const Function &Func = DAG.getMachineFunction().getFunction();
3233 for (const BasicBlock &BB : Func)
3234 for (const Instruction &I : BB)
3235 if (I.getOpcode() == Instruction::Call)
3236 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3237 if (Function *CF = CI->getCalledFunction())
3238 if (CF->isDeclaration() &&
3239 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3240 if (const GlobalValue *GV =
3241 dyn_cast<GlobalValue>(I.getOperand(0))) {
3242 TLSModel::Model GVModel = TM.getTLSModel(GV);
3243 if (GVModel == TLSModel::LocalDynamic)
3244 TLSGV.insert(GV);
3245 }
3246
3247 unsigned TLSGVCnt = TLSGV.size();
3248 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3249 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3250 FuncInfo->setAIXFuncUseTLSIEForLD();
3252 }
3253
3254 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3255 LLVM_DEBUG(
3256 dbgs() << DAG.getMachineFunction().getName()
3257 << " function is using the TLS-IE model for TLS-LD access.\n");
3258 Model = TLSModel::InitialExec;
3259 }
3260}
3261
3262SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3263 SelectionDAG &DAG) const {
3264 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3265
3266 if (DAG.getTarget().useEmulatedTLS())
3267 report_fatal_error("Emulated TLS is not yet supported on AIX");
3268
3269 SDLoc dl(GA);
3270 const GlobalValue *GV = GA->getGlobal();
3271 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3272 bool Is64Bit = Subtarget.isPPC64();
3274
3275 // Apply update to the TLS model.
3276 if (Subtarget.hasAIXShLibTLSModelOpt())
3278
3279 // TLS variables are accessed through TOC entries.
3280 // To support this, set the DAG to use the TOC base pointer.
3281 setUsesTOCBasePtr(DAG);
3282
3283 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3284
3285 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3286 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3287 bool HasAIXSmallTLSGlobalAttr = false;
3288 SDValue VariableOffsetTGA =
3289 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3290 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3291 SDValue TLSReg;
3292
3293 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3294 if (GVar->hasAttribute("aix-small-tls"))
3295 HasAIXSmallTLSGlobalAttr = true;
3296
3297 if (Is64Bit) {
3298 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3299 // involves a load of the variable offset (from the TOC), followed by an
3300 // add of the loaded variable offset to R13 (the thread pointer).
3301 // This code sequence looks like:
3302 // ld reg1,var[TC](2)
3303 // add reg2, reg1, r13 // r13 contains the thread pointer
3304 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3305
3306 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3307 // global variable attribute, produce a faster access sequence for
3308 // local-exec TLS variables where the offset from the TLS base is encoded
3309 // as an immediate operand.
3310 //
3311 // We only utilize the faster local-exec access sequence when the TLS
3312 // variable has a size within the policy limit. We treat types that are
3313 // not sized or are empty as being over the policy size limit.
3314 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3315 IsTLSLocalExecModel) {
3316 Type *GVType = GV->getValueType();
3317 if (GVType->isSized() && !GVType->isEmptyTy() &&
3318 GV->getDataLayout().getTypeAllocSize(GVType) <=
3320 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3321 }
3322 } else {
3323 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3324 // involves loading the variable offset from the TOC, generating a call to
3325 // .__get_tpointer to get the thread pointer (which will be in R3), and
3326 // adding the two together:
3327 // lwz reg1,var[TC](2)
3328 // bla .__get_tpointer
3329 // add reg2, reg1, r3
3330 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3331
3332 // We do not implement the 32-bit version of the faster access sequence
3333 // for local-exec that is controlled by the -maix-small-local-exec-tls
3334 // option, or the "aix-small-tls" global variable attribute.
3335 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3336 report_fatal_error("The small-local-exec TLS access sequence is "
3337 "currently only supported on AIX (64-bit mode).");
3338 }
3339 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3340 }
3341
3342 if (Model == TLSModel::LocalDynamic) {
3343 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3344
3345 // We do not implement the 32-bit version of the faster access sequence
3346 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3347 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3348 report_fatal_error("The small-local-dynamic TLS access sequence is "
3349 "currently only supported on AIX (64-bit mode).");
3350
3351 // For local-dynamic on AIX, we need to generate one TOC entry for each
3352 // variable offset, and a single module-handle TOC entry for the entire
3353 // file.
3354
3355 SDValue VariableOffsetTGA =
3356 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3357 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3358
3360 GlobalVariable *TLSGV =
3361 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3362 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3364 assert(TLSGV && "Not able to create GV for _$TLSML.");
3365 SDValue ModuleHandleTGA =
3366 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3367 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3368 SDValue ModuleHandle =
3369 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3370
3371 // With the -maix-small-local-dynamic-tls option, produce a faster access
3372 // sequence for local-dynamic TLS variables where the offset from the
3373 // module-handle is encoded as an immediate operand.
3374 //
3375 // We only utilize the faster local-dynamic access sequence when the TLS
3376 // variable has a size within the policy limit. We treat types that are
3377 // not sized or are empty as being over the policy size limit.
3378 if (HasAIXSmallLocalDynamicTLS) {
3379 Type *GVType = GV->getValueType();
3380 if (GVType->isSized() && !GVType->isEmptyTy() &&
3381 GV->getDataLayout().getTypeAllocSize(GVType) <=
3383 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3384 ModuleHandle);
3385 }
3386
3387 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3388 }
3389
3390 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3391 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3392 // need to generate two TOC entries, one for the variable offset, one for the
3393 // region handle. The global address for the TOC entry of the region handle is
3394 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3395 // entry of the variable offset is created with MO_TLSGD_FLAG.
3396 SDValue VariableOffsetTGA =
3397 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3398 SDValue RegionHandleTGA =
3399 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3400 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3401 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3402 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3403 RegionHandle);
3404}
3405
3406SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3407 SelectionDAG &DAG) const {
3408 // FIXME: TLS addresses currently use medium model code sequences,
3409 // which is the most useful form. Eventually support for small and
3410 // large models could be added if users need it, at the cost of
3411 // additional complexity.
3412 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3413 if (DAG.getTarget().useEmulatedTLS())
3414 return LowerToTLSEmulatedModel(GA, DAG);
3415
3416 SDLoc dl(GA);
3417 const GlobalValue *GV = GA->getGlobal();
3418 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3419 bool is64bit = Subtarget.isPPC64();
3420 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3421 PICLevel::Level picLevel = M->getPICLevel();
3422
3423 const TargetMachine &TM = getTargetMachine();
3424 TLSModel::Model Model = TM.getTLSModel(GV);
3425
3426 if (Model == TLSModel::LocalExec) {
3427 if (Subtarget.isUsingPCRelativeCalls()) {
3428 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3429 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3431 SDValue MatAddr =
3432 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3433 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3434 }
3435
3436 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3438 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3440 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3441 : DAG.getRegister(PPC::R2, MVT::i32);
3442
3443 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3444 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3445 }
3446
3447 if (Model == TLSModel::InitialExec) {
3448 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3450 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3451 SDValue TGATLS = DAG.getTargetGlobalAddress(
3452 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3453 SDValue TPOffset;
3454 if (IsPCRel) {
3455 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3456 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3457 MachinePointerInfo());
3458 } else {
3459 SDValue GOTPtr;
3460 if (is64bit) {
3461 setUsesTOCBasePtr(DAG);
3462 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3463 GOTPtr =
3464 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3465 } else {
3466 if (!TM.isPositionIndependent())
3467 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3468 else if (picLevel == PICLevel::SmallPIC)
3469 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3470 else
3471 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3472 }
3473 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3474 }
3475 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3476 }
3477
3478 if (Model == TLSModel::GeneralDynamic) {
3479 if (Subtarget.isUsingPCRelativeCalls()) {
3480 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3482 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3483 }
3484
3485 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3486 SDValue GOTPtr;
3487 if (is64bit) {
3488 setUsesTOCBasePtr(DAG);
3489 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3490 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3491 GOTReg, TGA);
3492 } else {
3493 if (picLevel == PICLevel::SmallPIC)
3494 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3495 else
3496 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3497 }
3498 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3499 GOTPtr, TGA, TGA);
3500 }
3501
3502 if (Model == TLSModel::LocalDynamic) {
3503 if (Subtarget.isUsingPCRelativeCalls()) {
3504 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3506 SDValue MatPCRel =
3507 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3508 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3509 }
3510
3511 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3512 SDValue GOTPtr;
3513 if (is64bit) {
3514 setUsesTOCBasePtr(DAG);
3515 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3516 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3517 GOTReg, TGA);
3518 } else {
3519 if (picLevel == PICLevel::SmallPIC)
3520 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3521 else
3522 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3523 }
3524 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3525 PtrVT, GOTPtr, TGA, TGA);
3526 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3527 PtrVT, TLSAddr, TGA);
3528 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3529 }
3530
3531 llvm_unreachable("Unknown TLS model!");
3532}
3533
3534SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3535 SelectionDAG &DAG) const {
3536 EVT PtrVT = Op.getValueType();
3537 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3538 SDLoc DL(GSDN);
3539 const GlobalValue *GV = GSDN->getGlobal();
3540
3541 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3542 // The actual address of the GlobalValue is stored in the TOC.
3543 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3544 if (Subtarget.isUsingPCRelativeCalls()) {
3545 EVT Ty = getPointerTy(DAG.getDataLayout());
3547 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3549 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3550 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3551 MachinePointerInfo());
3552 return Load;
3553 } else {
3554 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3556 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3557 }
3558 }
3559 setUsesTOCBasePtr(DAG);
3560 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3561 return getTOCEntry(DAG, DL, GA);
3562 }
3563
3564 unsigned MOHiFlag, MOLoFlag;
3565 bool IsPIC = isPositionIndependent();
3566 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3567
3568 if (IsPIC && Subtarget.isSVR4ABI()) {
3569 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3570 GSDN->getOffset(),
3572 return getTOCEntry(DAG, DL, GA);
3573 }
3574
3575 SDValue GAHi =
3576 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3577 SDValue GALo =
3578 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3579
3580 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3581}
3582
3583SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3584 bool IsStrict = Op->isStrictFPOpcode();
3585 ISD::CondCode CC =
3586 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3587 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3588 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3589 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3590 EVT LHSVT = LHS.getValueType();
3591 SDLoc dl(Op);
3592
3593 // Soften the setcc with libcall if it is fp128.
3594 if (LHSVT == MVT::f128) {
3595 assert(!Subtarget.hasP9Vector() &&
3596 "SETCC for f128 is already legal under Power9!");
3597 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3598 Op->getOpcode() == ISD::STRICT_FSETCCS);
3599 if (RHS.getNode())
3600 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3601 DAG.getCondCode(CC));
3602 if (IsStrict)
3603 return DAG.getMergeValues({LHS, Chain}, dl);
3604 return LHS;
3605 }
3606
3607 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3608
3609 if (Op.getValueType() == MVT::v2i64) {
3610 // When the operands themselves are v2i64 values, we need to do something
3611 // special because VSX has no underlying comparison operations for these.
3612 if (LHS.getValueType() == MVT::v2i64) {
3613 // Equality can be handled by casting to the legal type for Altivec
3614 // comparisons, everything else needs to be expanded.
3615 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3616 return SDValue();
3617 SDValue SetCC32 = DAG.getSetCC(
3618 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3619 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3620 int ShuffV[] = {1, 0, 3, 2};
3621 SDValue Shuff =
3622 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3623 return DAG.getBitcast(MVT::v2i64,
3624 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3625 dl, MVT::v4i32, Shuff, SetCC32));
3626 }
3627
3628 // We handle most of these in the usual way.
3629 return Op;
3630 }
3631
3632 // If we're comparing for equality to zero, expose the fact that this is
3633 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3634 // fold the new nodes.
3635 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3636 return V;
3637
3638 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3639 // Leave comparisons against 0 and -1 alone for now, since they're usually
3640 // optimized. FIXME: revisit this when we can custom lower all setcc
3641 // optimizations.
3642 if (C->isAllOnes() || C->isZero())
3643 return SDValue();
3644 }
3645
3646 // If we have an integer seteq/setne, turn it into a compare against zero
3647 // by xor'ing the rhs with the lhs, which is faster than setting a
3648 // condition register, reading it back out, and masking the correct bit. The
3649 // normal approach here uses sub to do this instead of xor. Using xor exposes
3650 // the result to other bit-twiddling opportunities.
3651 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3652 EVT VT = Op.getValueType();
3653 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3654 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3655 }
3656 return SDValue();
3657}
3658
3659SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3660 SDNode *Node = Op.getNode();
3661 EVT VT = Node->getValueType(0);
3662 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3663 SDValue InChain = Node->getOperand(0);
3664 SDValue VAListPtr = Node->getOperand(1);
3665 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3666 SDLoc dl(Node);
3667
3668 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3669
3670 // gpr_index
3671 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3672 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3673 InChain = GprIndex.getValue(1);
3674
3675 if (VT == MVT::i64) {
3676 // Check if GprIndex is even
3677 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3678 DAG.getConstant(1, dl, MVT::i32));
3679 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3680 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3681 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3682 DAG.getConstant(1, dl, MVT::i32));
3683 // Align GprIndex to be even if it isn't
3684 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3685 GprIndex);
3686 }
3687
3688 // fpr index is 1 byte after gpr
3689 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3690 DAG.getConstant(1, dl, MVT::i32));
3691
3692 // fpr
3693 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3694 FprPtr, MachinePointerInfo(SV), MVT::i8);
3695 InChain = FprIndex.getValue(1);
3696
3697 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3698 DAG.getConstant(8, dl, MVT::i32));
3699
3700 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3701 DAG.getConstant(4, dl, MVT::i32));
3702
3703 // areas
3704 SDValue OverflowArea =
3705 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3706 InChain = OverflowArea.getValue(1);
3707
3708 SDValue RegSaveArea =
3709 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3710 InChain = RegSaveArea.getValue(1);
3711
3712 // select overflow_area if index > 8
3713 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3714 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3715
3716 // adjustment constant gpr_index * 4/8
3717 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3718 VT.isInteger() ? GprIndex : FprIndex,
3719 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3720 MVT::i32));
3721
3722 // OurReg = RegSaveArea + RegConstant
3723 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3724 RegConstant);
3725
3726 // Floating types are 32 bytes into RegSaveArea
3727 if (VT.isFloatingPoint())
3728 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3729 DAG.getConstant(32, dl, MVT::i32));
3730
3731 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3732 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3733 VT.isInteger() ? GprIndex : FprIndex,
3734 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3735 MVT::i32));
3736
3737 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3738 VT.isInteger() ? VAListPtr : FprPtr,
3739 MachinePointerInfo(SV), MVT::i8);
3740
3741 // determine if we should load from reg_save_area or overflow_area
3742 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3743
3744 // increase overflow_area by 4/8 if gpr/fpr > 8
3745 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3746 DAG.getConstant(VT.isInteger() ? 4 : 8,
3747 dl, MVT::i32));
3748
3749 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3750 OverflowAreaPlusN);
3751
3752 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3753 MachinePointerInfo(), MVT::i32);
3754
3755 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3756}
3757
3758SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3759 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3760
3761 // We have to copy the entire va_list struct:
3762 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3763 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3764 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3765 false, true, /*CI=*/nullptr, std::nullopt,
3766 MachinePointerInfo(), MachinePointerInfo());
3767}
3768
3769SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3770 SelectionDAG &DAG) const {
3771 return Op.getOperand(0);
3772}
3773
3774SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3775 MachineFunction &MF = DAG.getMachineFunction();
3776 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3777
3778 assert((Op.getOpcode() == ISD::INLINEASM ||
3779 Op.getOpcode() == ISD::INLINEASM_BR) &&
3780 "Expecting Inline ASM node.");
3781
3782 // If an LR store is already known to be required then there is not point in
3783 // checking this ASM as well.
3784 if (MFI.isLRStoreRequired())
3785 return Op;
3786
3787 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3788 // type MVT::Glue. We want to ignore this last operand if that is the case.
3789 unsigned NumOps = Op.getNumOperands();
3790 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3791 --NumOps;
3792
3793 // Check all operands that may contain the LR.
3794 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3795 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3796 unsigned NumVals = Flags.getNumOperandRegisters();
3797 ++i; // Skip the ID value.
3798
3799 switch (Flags.getKind()) {
3800 default:
3801 llvm_unreachable("Bad flags!");
3805 i += NumVals;
3806 break;
3810 for (; NumVals; --NumVals, ++i) {
3811 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3812 if (Reg != PPC::LR && Reg != PPC::LR8)
3813 continue;
3814 MFI.setLRStoreRequired();
3815 return Op;
3816 }
3817 break;
3818 }
3819 }
3820 }
3821
3822 return Op;
3823}
3824
3825SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3826 SelectionDAG &DAG) const {
3827 SDValue Chain = Op.getOperand(0);
3828 SDValue Trmp = Op.getOperand(1); // trampoline
3829 SDValue FPtr = Op.getOperand(2); // nested function
3830 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3831 SDLoc dl(Op);
3832
3833 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3834
3835 if (Subtarget.isAIXABI()) {
3836 // On AIX we create a trampoline descriptor by combining the
3837 // entry point and TOC from the global descriptor (FPtr) with the
3838 // nest argument as the environment pointer.
3839 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3840 MaybeAlign PointerAlign(PointerSize);
3841 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3844 : MachineMemOperand::MONone;
3845
3846 uint64_t TOCPointerOffset = 1 * PointerSize;
3847 uint64_t EnvPointerOffset = 2 * PointerSize;
3848 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
3849 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
3850
3851 const Value *TrampolineAddr =
3852 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3853 const Function *Func =
3854 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
3855
3856 SDValue OutChains[3];
3857
3858 // Copy the entry point address from the global descriptor to the
3859 // trampoline buffer.
3860 SDValue LoadEntryPoint =
3861 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
3862 PointerAlign, MMOFlags);
3863 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
3864 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
3865 MachinePointerInfo(TrampolineAddr, 0));
3866
3867 // Copy the TOC pointer from the global descriptor to the trampoline
3868 // buffer.
3869 SDValue TOCFromDescriptorPtr =
3870 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
3871 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
3872 MachinePointerInfo(Func, TOCPointerOffset),
3873 PointerAlign, MMOFlags);
3874 SDValue TrampolineTOCPointer =
3875 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
3876 SDValue TOCLoadChain = TOCReg.getValue(1);
3877 OutChains[1] =
3878 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
3879 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3880
3881 // Store the nest argument into the environment pointer in the trampoline
3882 // buffer.
3883 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
3884 OutChains[2] =
3885 DAG.getStore(Chain, dl, Nest, EnvPointer,
3886 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3887
3889 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
3890 return TokenFactor;
3891 }
3892
3893 bool isPPC64 = (PtrVT == MVT::i64);
3894 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3895
3897 Args.emplace_back(Trmp, IntPtrTy);
3898 // TrampSize == (isPPC64 ? 48 : 40);
3899 Args.emplace_back(
3900 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
3901 IntPtrTy);
3902 Args.emplace_back(FPtr, IntPtrTy);
3903 Args.emplace_back(Nest, IntPtrTy);
3904
3905 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3906 TargetLowering::CallLoweringInfo CLI(DAG);
3907 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3909 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3910
3911 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3912 return CallResult.second;
3913}
3914
3915SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3916 MachineFunction &MF = DAG.getMachineFunction();
3917 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3918 EVT PtrVT = getPointerTy(MF.getDataLayout());
3919
3920 SDLoc dl(Op);
3921
3922 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3923 // vastart just stores the address of the VarArgsFrameIndex slot into the
3924 // memory location argument.
3925 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3926 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3927 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3928 MachinePointerInfo(SV));
3929 }
3930
3931 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3932 // We suppose the given va_list is already allocated.
3933 //
3934 // typedef struct {
3935 // char gpr; /* index into the array of 8 GPRs
3936 // * stored in the register save area
3937 // * gpr=0 corresponds to r3,
3938 // * gpr=1 to r4, etc.
3939 // */
3940 // char fpr; /* index into the array of 8 FPRs
3941 // * stored in the register save area
3942 // * fpr=0 corresponds to f1,
3943 // * fpr=1 to f2, etc.
3944 // */
3945 // char *overflow_arg_area;
3946 // /* location on stack that holds
3947 // * the next overflow argument
3948 // */
3949 // char *reg_save_area;
3950 // /* where r3:r10 and f1:f8 (if saved)
3951 // * are stored
3952 // */
3953 // } va_list[1];
3954
3955 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3956 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3957 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3958 PtrVT);
3959 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3960 PtrVT);
3961
3962 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3963 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3964
3965 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3966 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3967
3968 uint64_t FPROffset = 1;
3969 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3970
3971 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3972
3973 // Store first byte : number of int regs
3974 SDValue firstStore =
3975 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3976 MachinePointerInfo(SV), MVT::i8);
3977 uint64_t nextOffset = FPROffset;
3978 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3979 ConstFPROffset);
3980
3981 // Store second byte : number of float regs
3982 SDValue secondStore =
3983 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3984 MachinePointerInfo(SV, nextOffset), MVT::i8);
3985 nextOffset += StackOffset;
3986 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3987
3988 // Store second word : arguments given on stack
3989 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3990 MachinePointerInfo(SV, nextOffset));
3991 nextOffset += FrameOffset;
3992 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3993
3994 // Store third word : arguments given in registers
3995 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3996 MachinePointerInfo(SV, nextOffset));
3997}
3998
3999/// FPR - The set of FP registers that should be allocated for arguments
4000/// on Darwin and AIX.
4001static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4002 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4003 PPC::F11, PPC::F12, PPC::F13};
4004
4005/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4006/// the stack.
4007static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4008 unsigned PtrByteSize) {
4009 unsigned ArgSize = ArgVT.getStoreSize();
4010 if (Flags.isByVal())
4011 ArgSize = Flags.getByValSize();
4012
4013 // Round up to multiples of the pointer size, except for array members,
4014 // which are always packed.
4015 if (!Flags.isInConsecutiveRegs())
4016 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4017
4018 return ArgSize;
4019}
4020
4021/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4022/// on the stack.
4024 ISD::ArgFlagsTy Flags,
4025 unsigned PtrByteSize) {
4026 Align Alignment(PtrByteSize);
4027
4028 // Altivec parameters are padded to a 16 byte boundary.
4029 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4030 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4031 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4032 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4033 Alignment = Align(16);
4034
4035 // ByVal parameters are aligned as requested.
4036 if (Flags.isByVal()) {
4037 auto BVAlign = Flags.getNonZeroByValAlign();
4038 if (BVAlign > PtrByteSize) {
4039 if (BVAlign.value() % PtrByteSize != 0)
4041 "ByVal alignment is not a multiple of the pointer size");
4042
4043 Alignment = BVAlign;
4044 }
4045 }
4046
4047 // Array members are always packed to their original alignment.
4048 if (Flags.isInConsecutiveRegs()) {
4049 // If the array member was split into multiple registers, the first
4050 // needs to be aligned to the size of the full type. (Except for
4051 // ppcf128, which is only aligned as its f64 components.)
4052 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4053 Alignment = Align(OrigVT.getStoreSize());
4054 else
4055 Alignment = Align(ArgVT.getStoreSize());
4056 }
4057
4058 return Alignment;
4059}
4060
4061/// CalculateStackSlotUsed - Return whether this argument will use its
4062/// stack slot (instead of being passed in registers). ArgOffset,
4063/// AvailableFPRs, and AvailableVRs must hold the current argument
4064/// position, and will be updated to account for this argument.
4065static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4066 unsigned PtrByteSize, unsigned LinkageSize,
4067 unsigned ParamAreaSize, unsigned &ArgOffset,
4068 unsigned &AvailableFPRs,
4069 unsigned &AvailableVRs) {
4070 bool UseMemory = false;
4071
4072 // Respect alignment of argument on the stack.
4073 Align Alignment =
4074 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4075 ArgOffset = alignTo(ArgOffset, Alignment);
4076 // If there's no space left in the argument save area, we must
4077 // use memory (this check also catches zero-sized arguments).
4078 if (ArgOffset >= LinkageSize + ParamAreaSize)
4079 UseMemory = true;
4080
4081 // Allocate argument on the stack.
4082 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4083 if (Flags.isInConsecutiveRegsLast())
4084 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4085 // If we overran the argument save area, we must use memory
4086 // (this check catches arguments passed partially in memory)
4087 if (ArgOffset > LinkageSize + ParamAreaSize)
4088 UseMemory = true;
4089
4090 // However, if the argument is actually passed in an FPR or a VR,
4091 // we don't use memory after all.
4092 if (!Flags.isByVal()) {
4093 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4094 if (AvailableFPRs > 0) {
4095 --AvailableFPRs;
4096 return false;
4097 }
4098 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4099 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4100 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4101 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4102 if (AvailableVRs > 0) {
4103 --AvailableVRs;
4104 return false;
4105 }
4106 }
4107
4108 return UseMemory;
4109}
4110
4111/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4112/// ensure minimum alignment required for target.
4114 unsigned NumBytes) {
4115 return alignTo(NumBytes, Lowering->getStackAlign());
4116}
4117
4118SDValue PPCTargetLowering::LowerFormalArguments(
4119 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4120 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4121 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4122 if (Subtarget.isAIXABI())
4123 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4124 InVals);
4125 if (Subtarget.is64BitELFABI())
4126 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4127 InVals);
4128 assert(Subtarget.is32BitELFABI());
4129 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4130 InVals);
4131}
4132
4133SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4134 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4135 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4136 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4137
4138 // 32-bit SVR4 ABI Stack Frame Layout:
4139 // +-----------------------------------+
4140 // +--> | Back chain |
4141 // | +-----------------------------------+
4142 // | | Floating-point register save area |
4143 // | +-----------------------------------+
4144 // | | General register save area |
4145 // | +-----------------------------------+
4146 // | | CR save word |
4147 // | +-----------------------------------+
4148 // | | VRSAVE save word |
4149 // | +-----------------------------------+
4150 // | | Alignment padding |
4151 // | +-----------------------------------+
4152 // | | Vector register save area |
4153 // | +-----------------------------------+
4154 // | | Local variable space |
4155 // | +-----------------------------------+
4156 // | | Parameter list area |
4157 // | +-----------------------------------+
4158 // | | LR save word |
4159 // | +-----------------------------------+
4160 // SP--> +--- | Back chain |
4161 // +-----------------------------------+
4162 //
4163 // Specifications:
4164 // System V Application Binary Interface PowerPC Processor Supplement
4165 // AltiVec Technology Programming Interface Manual
4166
4167 MachineFunction &MF = DAG.getMachineFunction();
4168 MachineFrameInfo &MFI = MF.getFrameInfo();
4169 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4170
4171 EVT PtrVT = getPointerTy(MF.getDataLayout());
4172 // Potential tail calls could cause overwriting of argument stack slots.
4173 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4174 (CallConv == CallingConv::Fast));
4175 const Align PtrAlign(4);
4176
4177 // Assign locations to all of the incoming arguments.
4179 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4180 *DAG.getContext());
4181
4182 // Reserve space for the linkage area on the stack.
4183 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4184 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4185 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4186
4187 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4188 CCValAssign &VA = ArgLocs[i];
4189
4190 // Arguments stored in registers.
4191 if (VA.isRegLoc()) {
4192 const TargetRegisterClass *RC;
4193 EVT ValVT = VA.getValVT();
4194
4195 switch (ValVT.getSimpleVT().SimpleTy) {
4196 default:
4197 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4198 case MVT::i1:
4199 case MVT::i32:
4200 RC = &PPC::GPRCRegClass;
4201 break;
4202 case MVT::f32:
4203 if (Subtarget.hasP8Vector())
4204 RC = &PPC::VSSRCRegClass;
4205 else if (Subtarget.hasSPE())
4206 RC = &PPC::GPRCRegClass;
4207 else
4208 RC = &PPC::F4RCRegClass;
4209 break;
4210 case MVT::f64:
4211 if (Subtarget.hasVSX())
4212 RC = &PPC::VSFRCRegClass;
4213 else if (Subtarget.hasSPE())
4214 // SPE passes doubles in GPR pairs.
4215 RC = &PPC::GPRCRegClass;
4216 else
4217 RC = &PPC::F8RCRegClass;
4218 break;
4219 case MVT::v16i8:
4220 case MVT::v8i16:
4221 case MVT::v4i32:
4222 RC = &PPC::VRRCRegClass;
4223 break;
4224 case MVT::v4f32:
4225 RC = &PPC::VRRCRegClass;
4226 break;
4227 case MVT::v2f64:
4228 case MVT::v2i64:
4229 RC = &PPC::VRRCRegClass;
4230 break;
4231 }
4232
4233 SDValue ArgValue;
4234 // Transform the arguments stored in physical registers into
4235 // virtual ones.
4236 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4237 assert(i + 1 < e && "No second half of double precision argument");
4238 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4239 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4240 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4241 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4242 if (!Subtarget.isLittleEndian())
4243 std::swap (ArgValueLo, ArgValueHi);
4244 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4245 ArgValueHi);
4246 } else {
4247 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4248 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4249 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4250 if (ValVT == MVT::i1)
4251 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4252 }
4253
4254 InVals.push_back(ArgValue);
4255 } else {
4256 // Argument stored in memory.
4257 assert(VA.isMemLoc());
4258
4259 // Get the extended size of the argument type in stack
4260 unsigned ArgSize = VA.getLocVT().getStoreSize();
4261 // Get the actual size of the argument type
4262 unsigned ObjSize = VA.getValVT().getStoreSize();
4263 unsigned ArgOffset = VA.getLocMemOffset();
4264 // Stack objects in PPC32 are right justified.
4265 ArgOffset += ArgSize - ObjSize;
4266 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4267
4268 // Create load nodes to retrieve arguments from the stack.
4269 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4270 InVals.push_back(
4271 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4272 }
4273 }
4274
4275 // Assign locations to all of the incoming aggregate by value arguments.
4276 // Aggregates passed by value are stored in the local variable space of the
4277 // caller's stack frame, right above the parameter list area.
4278 SmallVector<CCValAssign, 16> ByValArgLocs;
4279 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4280 ByValArgLocs, *DAG.getContext());
4281
4282 // Reserve stack space for the allocations in CCInfo.
4283 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4284
4285 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4286
4287 // Area that is at least reserved in the caller of this function.
4288 unsigned MinReservedArea = CCByValInfo.getStackSize();
4289 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4290
4291 // Set the size that is at least reserved in caller of this function. Tail
4292 // call optimized function's reserved stack space needs to be aligned so that
4293 // taking the difference between two stack areas will result in an aligned
4294 // stack.
4295 MinReservedArea =
4296 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4297 FuncInfo->setMinReservedArea(MinReservedArea);
4298
4300
4301 // If the function takes variable number of arguments, make a frame index for
4302 // the start of the first vararg value... for expansion of llvm.va_start.
4303 if (isVarArg) {
4304 static const MCPhysReg GPArgRegs[] = {
4305 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4306 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4307 };
4308 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4309
4310 static const MCPhysReg FPArgRegs[] = {
4311 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4312 PPC::F8
4313 };
4314 unsigned NumFPArgRegs = std::size(FPArgRegs);
4315
4316 if (useSoftFloat() || hasSPE())
4317 NumFPArgRegs = 0;
4318
4319 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4320 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4321
4322 // Make room for NumGPArgRegs and NumFPArgRegs.
4323 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4324 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4325
4327 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4328
4329 FuncInfo->setVarArgsFrameIndex(
4330 MFI.CreateStackObject(Depth, Align(8), false));
4331 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4332
4333 // The fixed integer arguments of a variadic function are stored to the
4334 // VarArgsFrameIndex on the stack so that they may be loaded by
4335 // dereferencing the result of va_next.
4336 for (MCPhysReg GPArgReg : GPArgRegs) {
4337 // Get an existing live-in vreg, or add a new one.
4338 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4339 if (!VReg)
4340 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4341
4342 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4343 SDValue Store =
4344 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4345 MemOps.push_back(Store);
4346 // Increment the address by four for the next argument to store
4347 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4348 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4349 }
4350
4351 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4352 // is set.
4353 // The double arguments are stored to the VarArgsFrameIndex
4354 // on the stack.
4355 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4356 // Get an existing live-in vreg, or add a new one.
4357 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4358 if (!VReg)
4359 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4360
4361 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4362 SDValue Store =
4363 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4364 MemOps.push_back(Store);
4365 // Increment the address by eight for the next argument to store
4366 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4367 PtrVT);
4368 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4369 }
4370 }
4371
4372 if (!MemOps.empty())
4373 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4374
4375 return Chain;
4376}
4377
4378// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4379// value to MVT::i64 and then truncate to the correct register size.
4380SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4381 EVT ObjectVT, SelectionDAG &DAG,
4382 SDValue ArgVal,
4383 const SDLoc &dl) const {
4384 if (Flags.isSExt())
4385 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4386 DAG.getValueType(ObjectVT));
4387 else if (Flags.isZExt())
4388 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4389 DAG.getValueType(ObjectVT));
4390
4391 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4392}
4393
4394SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4395 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4396 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4397 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4398 // TODO: add description of PPC stack frame format, or at least some docs.
4399 //
4400 bool isELFv2ABI = Subtarget.isELFv2ABI();
4401 bool isLittleEndian = Subtarget.isLittleEndian();
4402 MachineFunction &MF = DAG.getMachineFunction();
4403 MachineFrameInfo &MFI = MF.getFrameInfo();
4404 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4405
4406 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4407 "fastcc not supported on varargs functions");
4408
4409 EVT PtrVT = getPointerTy(MF.getDataLayout());
4410 // Potential tail calls could cause overwriting of argument stack slots.
4411 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4412 (CallConv == CallingConv::Fast));
4413 unsigned PtrByteSize = 8;
4414 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4415
4416 static const MCPhysReg GPR[] = {
4417 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4418 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4419 };
4420 static const MCPhysReg VR[] = {
4421 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4422 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4423 };
4424
4425 const unsigned Num_GPR_Regs = std::size(GPR);
4426 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4427 const unsigned Num_VR_Regs = std::size(VR);
4428
4429 // Do a first pass over the arguments to determine whether the ABI
4430 // guarantees that our caller has allocated the parameter save area
4431 // on its stack frame. In the ELFv1 ABI, this is always the case;
4432 // in the ELFv2 ABI, it is true if this is a vararg function or if
4433 // any parameter is located in a stack slot.
4434
4435 bool HasParameterArea = !isELFv2ABI || isVarArg;
4436 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4437 unsigned NumBytes = LinkageSize;
4438 unsigned AvailableFPRs = Num_FPR_Regs;
4439 unsigned AvailableVRs = Num_VR_Regs;
4440 for (const ISD::InputArg &In : Ins) {
4441 if (In.Flags.isNest())
4442 continue;
4443
4444 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4445 LinkageSize, ParamAreaSize, NumBytes,
4446 AvailableFPRs, AvailableVRs))
4447 HasParameterArea = true;
4448 }
4449
4450 // Add DAG nodes to load the arguments or copy them out of registers. On
4451 // entry to a function on PPC, the arguments start after the linkage area,
4452 // although the first ones are often in registers.
4453
4454 unsigned ArgOffset = LinkageSize;
4455 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4458 unsigned CurArgIdx = 0;
4459 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4460 SDValue ArgVal;
4461 bool needsLoad = false;
4462 EVT ObjectVT = Ins[ArgNo].VT;
4463 EVT OrigVT = Ins[ArgNo].ArgVT;
4464 unsigned ObjSize = ObjectVT.getStoreSize();
4465 unsigned ArgSize = ObjSize;
4466 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4467 if (Ins[ArgNo].isOrigArg()) {
4468 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4469 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4470 }
4471 // We re-align the argument offset for each argument, except when using the
4472 // fast calling convention, when we need to make sure we do that only when
4473 // we'll actually use a stack slot.
4474 unsigned CurArgOffset;
4475 Align Alignment;
4476 auto ComputeArgOffset = [&]() {
4477 /* Respect alignment of argument on the stack. */
4478 Alignment =
4479 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4480 ArgOffset = alignTo(ArgOffset, Alignment);
4481 CurArgOffset = ArgOffset;
4482 };
4483
4484 if (CallConv != CallingConv::Fast) {
4485 ComputeArgOffset();
4486
4487 /* Compute GPR index associated with argument offset. */
4488 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4489 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4490 }
4491
4492 // FIXME the codegen can be much improved in some cases.
4493 // We do not have to keep everything in memory.
4494 if (Flags.isByVal()) {
4495 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4496
4497 if (CallConv == CallingConv::Fast)
4498 ComputeArgOffset();
4499
4500 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4501 ObjSize = Flags.getByValSize();
4502 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4503 // Empty aggregate parameters do not take up registers. Examples:
4504 // struct { } a;
4505 // union { } b;
4506 // int c[0];
4507 // etc. However, we have to provide a place-holder in InVals, so
4508 // pretend we have an 8-byte item at the current address for that
4509 // purpose.
4510 if (!ObjSize) {
4511 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4512 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4513 InVals.push_back(FIN);
4514 continue;
4515 }
4516
4517 // Create a stack object covering all stack doublewords occupied
4518 // by the argument. If the argument is (fully or partially) on
4519 // the stack, or if the argument is fully in registers but the
4520 // caller has allocated the parameter save anyway, we can refer
4521 // directly to the caller's stack frame. Otherwise, create a
4522 // local copy in our own frame.
4523 int FI;
4524 if (HasParameterArea ||
4525 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4526 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4527 else
4528 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4529 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4530
4531 // Handle aggregates smaller than 8 bytes.
4532 if (ObjSize < PtrByteSize) {
4533 // The value of the object is its address, which differs from the
4534 // address of the enclosing doubleword on big-endian systems.
4535 SDValue Arg = FIN;
4536 if (!isLittleEndian) {
4537 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4538 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4539 }
4540 InVals.push_back(Arg);
4541
4542 if (GPR_idx != Num_GPR_Regs) {
4543 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4544 FuncInfo->addLiveInAttr(VReg, Flags);
4545 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4546 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4547 SDValue Store =
4548 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4549 MachinePointerInfo(&*FuncArg), ObjType);
4550 MemOps.push_back(Store);
4551 }
4552 // Whether we copied from a register or not, advance the offset
4553 // into the parameter save area by a full doubleword.
4554 ArgOffset += PtrByteSize;
4555 continue;
4556 }
4557
4558 // The value of the object is its address, which is the address of
4559 // its first stack doubleword.
4560 InVals.push_back(FIN);
4561
4562 // Store whatever pieces of the object are in registers to memory.
4563 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4564 if (GPR_idx == Num_GPR_Regs)
4565 break;
4566
4567 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4568 FuncInfo->addLiveInAttr(VReg, Flags);
4569 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4570 SDValue Addr = FIN;
4571 if (j) {
4572 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4573 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4574 }
4575 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4576 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4577 SDValue Store =
4578 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4579 MachinePointerInfo(&*FuncArg, j), ObjType);
4580 MemOps.push_back(Store);
4581 ++GPR_idx;
4582 }
4583 ArgOffset += ArgSize;
4584 continue;
4585 }
4586
4587 switch (ObjectVT.getSimpleVT().SimpleTy) {
4588 default: llvm_unreachable("Unhandled argument type!");
4589 case MVT::i1:
4590 case MVT::i32:
4591 case MVT::i64:
4592 if (Flags.isNest()) {
4593 // The 'nest' parameter, if any, is passed in R11.
4594 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4595 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4596
4597 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4598 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4599
4600 break;
4601 }
4602
4603 // These can be scalar arguments or elements of an integer array type
4604 // passed directly. Clang may use those instead of "byval" aggregate
4605 // types to avoid forcing arguments to memory unnecessarily.
4606 if (GPR_idx != Num_GPR_Regs) {
4607 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4608 FuncInfo->addLiveInAttr(VReg, Flags);
4609 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4610
4611 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4612 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4613 // value to MVT::i64 and then truncate to the correct register size.
4614 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4615 } else {
4616 if (CallConv == CallingConv::Fast)
4617 ComputeArgOffset();
4618
4619 needsLoad = true;
4620 ArgSize = PtrByteSize;
4621 }
4622 if (CallConv != CallingConv::Fast || needsLoad)
4623 ArgOffset += 8;
4624 break;
4625
4626 case MVT::f32:
4627 case MVT::f64:
4628 // These can be scalar arguments or elements of a float array type
4629 // passed directly. The latter are used to implement ELFv2 homogenous
4630 // float aggregates.
4631 if (FPR_idx != Num_FPR_Regs) {
4632 unsigned VReg;
4633
4634 if (ObjectVT == MVT::f32)
4635 VReg = MF.addLiveIn(FPR[FPR_idx],
4636 Subtarget.hasP8Vector()
4637 ? &PPC::VSSRCRegClass
4638 : &PPC::F4RCRegClass);
4639 else
4640 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4641 ? &PPC::VSFRCRegClass
4642 : &PPC::F8RCRegClass);
4643
4644 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4645 ++FPR_idx;
4646 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4647 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4648 // once we support fp <-> gpr moves.
4649
4650 // This can only ever happen in the presence of f32 array types,
4651 // since otherwise we never run out of FPRs before running out
4652 // of GPRs.
4653 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4654 FuncInfo->addLiveInAttr(VReg, Flags);
4655 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4656
4657 if (ObjectVT == MVT::f32) {
4658 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4659 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4660 DAG.getConstant(32, dl, MVT::i32));
4661 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4662 }
4663
4664 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4665 } else {
4666 if (CallConv == CallingConv::Fast)
4667 ComputeArgOffset();
4668
4669 needsLoad = true;
4670 }
4671
4672 // When passing an array of floats, the array occupies consecutive
4673 // space in the argument area; only round up to the next doubleword
4674 // at the end of the array. Otherwise, each float takes 8 bytes.
4675 if (CallConv != CallingConv::Fast || needsLoad) {
4676 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4677 ArgOffset += ArgSize;
4678 if (Flags.isInConsecutiveRegsLast())
4679 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4680 }
4681 break;
4682 case MVT::v4f32:
4683 case MVT::v4i32:
4684 case MVT::v8i16:
4685 case MVT::v16i8:
4686 case MVT::v2f64:
4687 case MVT::v2i64:
4688 case MVT::v1i128:
4689 case MVT::f128:
4690 // These can be scalar arguments or elements of a vector array type
4691 // passed directly. The latter are used to implement ELFv2 homogenous
4692 // vector aggregates.
4693 if (VR_idx != Num_VR_Regs) {
4694 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4695 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4696 ++VR_idx;
4697 } else {
4698 if (CallConv == CallingConv::Fast)
4699 ComputeArgOffset();
4700 needsLoad = true;
4701 }
4702 if (CallConv != CallingConv::Fast || needsLoad)
4703 ArgOffset += 16;
4704 break;
4705 }
4706
4707 // We need to load the argument to a virtual register if we determined
4708 // above that we ran out of physical registers of the appropriate type.
4709 if (needsLoad) {
4710 if (ObjSize < ArgSize && !isLittleEndian)
4711 CurArgOffset += ArgSize - ObjSize;
4712 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4713 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4714 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4715 }
4716
4717 InVals.push_back(ArgVal);
4718 }
4719
4720 // Area that is at least reserved in the caller of this function.
4721 unsigned MinReservedArea;
4722 if (HasParameterArea)
4723 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4724 else
4725 MinReservedArea = LinkageSize;
4726
4727 // Set the size that is at least reserved in caller of this function. Tail
4728 // call optimized functions' reserved stack space needs to be aligned so that
4729 // taking the difference between two stack areas will result in an aligned
4730 // stack.
4731 MinReservedArea =
4732 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4733 FuncInfo->setMinReservedArea(MinReservedArea);
4734
4735 // If the function takes variable number of arguments, make a frame index for
4736 // the start of the first vararg value... for expansion of llvm.va_start.
4737 // On ELFv2ABI spec, it writes:
4738 // C programs that are intended to be *portable* across different compilers
4739 // and architectures must use the header file <stdarg.h> to deal with variable
4740 // argument lists.
4741 if (isVarArg && MFI.hasVAStart()) {
4742 int Depth = ArgOffset;
4743
4744 FuncInfo->setVarArgsFrameIndex(
4745 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4746 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4747
4748 // If this function is vararg, store any remaining integer argument regs
4749 // to their spots on the stack so that they may be loaded by dereferencing
4750 // the result of va_next.
4751 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4752 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4753 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4754 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4755 SDValue Store =
4756 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4757 MemOps.push_back(Store);
4758 // Increment the address by four for the next argument to store
4759 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4760 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4761 }
4762 }
4763
4764 if (!MemOps.empty())
4765 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4766
4767 return Chain;
4768}
4769
4770/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4771/// adjusted to accommodate the arguments for the tailcall.
4772static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4773 unsigned ParamSize) {
4774
4775 if (!isTailCall) return 0;
4776
4778 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4779 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4780 // Remember only if the new adjustment is bigger.
4781 if (SPDiff < FI->getTailCallSPDelta())
4782 FI->setTailCallSPDelta(SPDiff);
4783
4784 return SPDiff;
4785}
4786
4787static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4788
4789static bool callsShareTOCBase(const Function *Caller,
4790 const GlobalValue *CalleeGV,
4791 const TargetMachine &TM) {
4792 // It does not make sense to call callsShareTOCBase() with a caller that
4793 // is PC Relative since PC Relative callers do not have a TOC.
4794#ifndef NDEBUG
4795 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4796 assert(!STICaller->isUsingPCRelativeCalls() &&
4797 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4798#endif
4799
4800 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4801 // don't have enough information to determine if the caller and callee share
4802 // the same TOC base, so we have to pessimistically assume they don't for
4803 // correctness.
4804 if (!CalleeGV)
4805 return false;
4806
4807 // If the callee is preemptable, then the static linker will use a plt-stub
4808 // which saves the toc to the stack, and needs a nop after the call
4809 // instruction to convert to a toc-restore.
4810 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4811 return false;
4812
4813 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4814 // We may need a TOC restore in the situation where the caller requires a
4815 // valid TOC but the callee is PC Relative and does not.
4816 const Function *F = dyn_cast<Function>(CalleeGV);
4817 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4818
4819 // If we have an Alias we can try to get the function from there.
4820 if (Alias) {
4821 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4822 F = dyn_cast<Function>(GlobalObj);
4823 }
4824
4825 // If we still have no valid function pointer we do not have enough
4826 // information to determine if the callee uses PC Relative calls so we must
4827 // assume that it does.
4828 if (!F)
4829 return false;
4830
4831 // If the callee uses PC Relative we cannot guarantee that the callee won't
4832 // clobber the TOC of the caller and so we must assume that the two
4833 // functions do not share a TOC base.
4834 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4835 if (STICallee->isUsingPCRelativeCalls())
4836 return false;
4837
4838 // If the GV is not a strong definition then we need to assume it can be
4839 // replaced by another function at link time. The function that replaces
4840 // it may not share the same TOC as the caller since the callee may be
4841 // replaced by a PC Relative version of the same function.
4842 if (!CalleeGV->isStrongDefinitionForLinker())
4843 return false;
4844
4845 // The medium and large code models are expected to provide a sufficiently
4846 // large TOC to provide all data addressing needs of a module with a
4847 // single TOC.
4848 if (CodeModel::Medium == TM.getCodeModel() ||
4850 return true;
4851
4852 // Any explicitly-specified sections and section prefixes must also match.
4853 // Also, if we're using -ffunction-sections, then each function is always in
4854 // a different section (the same is true for COMDAT functions).
4855 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4856 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4857 return false;
4858 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4859 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4860 return false;
4861 }
4862
4863 return true;
4864}
4865
4866static bool
4868 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4869 assert(Subtarget.is64BitELFABI());
4870
4871 const unsigned PtrByteSize = 8;
4872 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4873
4874 static const MCPhysReg GPR[] = {
4875 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4876 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4877 };
4878 static const MCPhysReg VR[] = {
4879 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4880 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4881 };
4882
4883 const unsigned NumGPRs = std::size(GPR);
4884 const unsigned NumFPRs = 13;
4885 const unsigned NumVRs = std::size(VR);
4886 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4887
4888 unsigned NumBytes = LinkageSize;
4889 unsigned AvailableFPRs = NumFPRs;
4890 unsigned AvailableVRs = NumVRs;
4891
4892 for (const ISD::OutputArg& Param : Outs) {
4893 if (Param.Flags.isNest()) continue;
4894
4895 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4896 LinkageSize, ParamAreaSize, NumBytes,
4897 AvailableFPRs, AvailableVRs))
4898 return true;
4899 }
4900 return false;
4901}
4902
4903static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4904 if (CB.arg_size() != CallerFn->arg_size())
4905 return false;
4906
4907 auto CalleeArgIter = CB.arg_begin();
4908 auto CalleeArgEnd = CB.arg_end();
4909 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4910
4911 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4912 const Value* CalleeArg = *CalleeArgIter;
4913 const Value* CallerArg = &(*CallerArgIter);
4914 if (CalleeArg == CallerArg)
4915 continue;
4916
4917 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4918 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4919 // }
4920 // 1st argument of callee is undef and has the same type as caller.
4921 if (CalleeArg->getType() == CallerArg->getType() &&
4922 isa<UndefValue>(CalleeArg))
4923 continue;
4924
4925 return false;
4926 }
4927
4928 return true;
4929}
4930
4931// Returns true if TCO is possible between the callers and callees
4932// calling conventions.
4933static bool
4935 CallingConv::ID CalleeCC) {
4936 // Tail calls are possible with fastcc and ccc.
4937 auto isTailCallableCC = [] (CallingConv::ID CC){
4938 return CC == CallingConv::C || CC == CallingConv::Fast;
4939 };
4940 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4941 return false;
4942
4943 // We can safely tail call both fastcc and ccc callees from a c calling
4944 // convention caller. If the caller is fastcc, we may have less stack space
4945 // than a non-fastcc caller with the same signature so disable tail-calls in
4946 // that case.
4947 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4948}
4949
4950bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4951 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4952 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4954 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4955 bool isCalleeExternalSymbol) const {
4956 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4957
4958 if (DisableSCO && !TailCallOpt) return false;
4959
4960 // Variadic argument functions are not supported.
4961 if (isVarArg) return false;
4962
4963 // Check that the calling conventions are compatible for tco.
4964 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4965 return false;
4966
4967 // Caller contains any byval parameter is not supported.
4968 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4969 return false;
4970
4971 // Callee contains any byval parameter is not supported, too.
4972 // Note: This is a quick work around, because in some cases, e.g.
4973 // caller's stack size > callee's stack size, we are still able to apply
4974 // sibling call optimization. For example, gcc is able to do SCO for caller1
4975 // in the following example, but not for caller2.
4976 // struct test {
4977 // long int a;
4978 // char ary[56];
4979 // } gTest;
4980 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4981 // b->a = v.a;
4982 // return 0;
4983 // }
4984 // void caller1(struct test a, struct test c, struct test *b) {
4985 // callee(gTest, b); }
4986 // void caller2(struct test *b) { callee(gTest, b); }
4987 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4988 return false;
4989
4990 // If callee and caller use different calling conventions, we cannot pass
4991 // parameters on stack since offsets for the parameter area may be different.
4992 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4993 return false;
4994
4995 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4996 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4997 // callee potentially have different TOC bases then we cannot tail call since
4998 // we need to restore the TOC pointer after the call.
4999 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5000 // We cannot guarantee this for indirect calls or calls to external functions.
5001 // When PC-Relative addressing is used, the concept of the TOC is no longer
5002 // applicable so this check is not required.
5003 // Check first for indirect calls.
5004 if (!Subtarget.isUsingPCRelativeCalls() &&
5005 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5006 return false;
5007
5008 // Check if we share the TOC base.
5009 if (!Subtarget.isUsingPCRelativeCalls() &&
5010 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5011 return false;
5012
5013 // TCO allows altering callee ABI, so we don't have to check further.
5014 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5015 return true;
5016
5017 if (DisableSCO) return false;
5018
5019 // If callee use the same argument list that caller is using, then we can
5020 // apply SCO on this case. If it is not, then we need to check if callee needs
5021 // stack for passing arguments.
5022 // PC Relative tail calls may not have a CallBase.
5023 // If there is no CallBase we cannot verify if we have the same argument
5024 // list so assume that we don't have the same argument list.
5025 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5026 needStackSlotPassParameters(Subtarget, Outs))
5027 return false;
5028 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5029 return false;
5030
5031 return true;
5032}
5033
5034/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5035/// for tail call optimization. Targets which want to do tail call
5036/// optimization should implement this function.
5037bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5038 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5039 CallingConv::ID CallerCC, bool isVarArg,
5040 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5041 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5042 return false;
5043
5044 // Variable argument functions are not supported.
5045 if (isVarArg)
5046 return false;
5047
5048 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5049 // Functions containing by val parameters are not supported.
5050 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5051 return false;
5052
5053 // Non-PIC/GOT tail calls are supported.
5054 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5055 return true;
5056
5057 // At the moment we can only do local tail calls (in same module, hidden
5058 // or protected) if we are generating PIC.
5059 if (CalleeGV)
5060 return CalleeGV->hasHiddenVisibility() ||
5061 CalleeGV->hasProtectedVisibility();
5062 }
5063
5064 return false;
5065}
5066
5067/// isCallCompatibleAddress - Return the immediate to use if the specified
5068/// 32-bit value is representable in the immediate field of a BxA instruction.
5071 if (!C) return nullptr;
5072
5073 int Addr = C->getZExtValue();
5074 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5075 SignExtend32<26>(Addr) != Addr)
5076 return nullptr; // Top 6 bits have to be sext of immediate.
5077
5078 return DAG
5080 (int)C->getZExtValue() >> 2, SDLoc(Op),
5082 .getNode();
5083}
5084
5085namespace {
5086
5087struct TailCallArgumentInfo {
5088 SDValue Arg;
5089 SDValue FrameIdxOp;
5090 int FrameIdx = 0;
5091
5092 TailCallArgumentInfo() = default;
5093};
5094
5095} // end anonymous namespace
5096
5097/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5099 SelectionDAG &DAG, SDValue Chain,
5100 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5101 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5102 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5103 SDValue Arg = TailCallArgs[i].Arg;
5104 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5105 int FI = TailCallArgs[i].FrameIdx;
5106 // Store relative to framepointer.
5107 MemOpChains.push_back(DAG.getStore(
5108 Chain, dl, Arg, FIN,
5110 }
5111}
5112
5113/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5114/// the appropriate stack slot for the tail call optimized function call.
5116 SDValue OldRetAddr, SDValue OldFP,
5117 int SPDiff, const SDLoc &dl) {
5118 if (SPDiff) {
5119 // Calculate the new stack slot for the return address.
5121 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5122 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5123 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5124 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5125 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5126 NewRetAddrLoc, true);
5127 SDValue NewRetAddrFrIdx =
5128 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5129 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5130 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5131 }
5132 return Chain;
5133}
5134
5135/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5136/// the position of the argument.
5138 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5139 int SPDiff, unsigned ArgOffset,
5140 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5141 int Offset = ArgOffset + SPDiff;
5142 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5143 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5144 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5145 SDValue FIN = DAG.getFrameIndex(FI, VT);
5146 TailCallArgumentInfo Info;
5147 Info.Arg = Arg;
5148 Info.FrameIdxOp = FIN;
5149 Info.FrameIdx = FI;
5150 TailCallArguments.push_back(Info);
5151}
5152
5153/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5154/// stack slot. Returns the chain as result and the loaded frame pointers in
5155/// LROpOut/FPOpout. Used when tail calling.
5156SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5157 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5158 SDValue &FPOpOut, const SDLoc &dl) const {
5159 if (SPDiff) {
5160 // Load the LR and FP stack slot for later adjusting.
5161 LROpOut = getReturnAddrFrameIndex(DAG);
5162 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5163 MachinePointerInfo());
5164 Chain = SDValue(LROpOut.getNode(), 1);
5165 }
5166 return Chain;
5167}
5168
5169/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5170/// by "Src" to address "Dst" of size "Size". Alignment information is
5171/// specified by the specific parameter attribute. The copy will be passed as
5172/// a byval function parameter.
5173/// Sometimes what we are copying is the end of a larger object, the part that
5174/// does not fit in registers.
5176 SDValue Chain, ISD::ArgFlagsTy Flags,
5177 SelectionDAG &DAG, const SDLoc &dl) {
5178 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5179 return DAG.getMemcpy(
5180 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5181 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5182}
5183
5184/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5185/// tail calls.
5187 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5188 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5189 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5190 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5192 if (!isTailCall) {
5193 if (isVector) {
5194 SDValue StackPtr;
5195 if (isPPC64)
5196 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5197 else
5198 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5199 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5200 DAG.getConstant(ArgOffset, dl, PtrVT));
5201 }
5202 MemOpChains.push_back(
5203 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5204 // Calculate and remember argument location.
5205 } else
5206 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5207 TailCallArguments);
5208}
5209
5210static void
5212 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5213 SDValue FPOp,
5214 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5215 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5216 // might overwrite each other in case of tail call optimization.
5217 SmallVector<SDValue, 8> MemOpChains2;
5218 // Do not flag preceding copytoreg stuff together with the following stuff.
5219 InGlue = SDValue();
5220 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5221 MemOpChains2, dl);
5222 if (!MemOpChains2.empty())
5223 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5224
5225 // Store the return address to the appropriate stack slot.
5226 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5227
5228 // Emit callseq_end just before tailcall node.
5229 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5230 InGlue = Chain.getValue(1);
5231}
5232
5233// Is this global address that of a function that can be called by name? (as
5234// opposed to something that must hold a descriptor for an indirect call).
5235static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5236 if (GV) {
5237 if (GV->isThreadLocal())
5238 return false;
5239
5240 return GV->getValueType()->isFunctionTy();
5241 }
5242
5243 return false;
5244}
5245
5246SDValue PPCTargetLowering::LowerCallResult(
5247 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5248 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5249 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5251 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5252 *DAG.getContext());
5253
5254 CCRetInfo.AnalyzeCallResult(
5255 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5257 : RetCC_PPC);
5258
5259 // Copy all of the result registers out of their specified physreg.
5260 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5261 CCValAssign &VA = RVLocs[i];
5262 assert(VA.isRegLoc() && "Can only return in registers!");
5263
5264 SDValue Val;
5265
5266 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5267 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5268 InGlue);
5269 Chain = Lo.getValue(1);
5270 InGlue = Lo.getValue(2);
5271 VA = RVLocs[++i]; // skip ahead to next loc
5272 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5273 InGlue);
5274 Chain = Hi.getValue(1);
5275 InGlue = Hi.getValue(2);
5276 if (!Subtarget.isLittleEndian())
5277 std::swap (Lo, Hi);
5278 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5279 } else {
5280 Val = DAG.getCopyFromReg(Chain, dl,
5281 VA.getLocReg(), VA.getLocVT(), InGlue);
5282 Chain = Val.getValue(1);
5283 InGlue = Val.getValue(2);
5284 }
5285
5286 switch (VA.getLocInfo()) {
5287 default: llvm_unreachable("Unknown loc info!");
5288 case CCValAssign::Full: break;
5289 case CCValAssign::AExt:
5290 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5291 break;
5292 case CCValAssign::ZExt:
5293 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5294 DAG.getValueType(VA.getValVT()));
5295 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5296 break;
5297 case CCValAssign::SExt:
5298 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5299 DAG.getValueType(VA.getValVT()));
5300 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5301 break;
5302 }
5303
5304 InVals.push_back(Val);
5305 }
5306
5307 return Chain;
5308}
5309
5310static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5311 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5312 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5313 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5314
5315 // PatchPoint calls are not indirect.
5316 if (isPatchPoint)
5317 return false;
5318
5320 return false;
5321
5322 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5323 // becuase the immediate function pointer points to a descriptor instead of
5324 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5325 // pointer immediate points to the global entry point, while the BLA would
5326 // need to jump to the local entry point (see rL211174).
5327 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5328 isBLACompatibleAddress(Callee, DAG))
5329 return false;
5330
5331 return true;
5332}
5333
5334// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5335static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5336 return Subtarget.isAIXABI() ||
5337 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5338}
5339
5341 const Function &Caller, const SDValue &Callee,
5342 const PPCSubtarget &Subtarget,
5343 const TargetMachine &TM,
5344 bool IsStrictFPCall = false) {
5345 if (CFlags.IsTailCall)
5346 return PPCISD::TC_RETURN;
5347
5348 unsigned RetOpc = 0;
5349 // This is a call through a function pointer.
5350 if (CFlags.IsIndirect) {
5351 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5352 // indirect calls. The save of the caller's TOC pointer to the stack will be
5353 // inserted into the DAG as part of call lowering. The restore of the TOC
5354 // pointer is modeled by using a pseudo instruction for the call opcode that
5355 // represents the 2 instruction sequence of an indirect branch and link,
5356 // immediately followed by a load of the TOC pointer from the stack save
5357 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5358 // as it is not saved or used.
5359 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5360 : PPCISD::BCTRL;
5361 } else if (Subtarget.isUsingPCRelativeCalls()) {
5362 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5363 RetOpc = PPCISD::CALL_NOTOC;
5364 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5365 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5366 // immediately following the call instruction if the caller and callee may
5367 // have different TOC bases. At link time if the linker determines the calls
5368 // may not share a TOC base, the call is redirected to a trampoline inserted
5369 // by the linker. The trampoline will (among other things) save the callers
5370 // TOC pointer at an ABI designated offset in the linkage area and the
5371 // linker will rewrite the nop to be a load of the TOC pointer from the
5372 // linkage area into gpr2.
5373 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5374 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5375 RetOpc =
5376 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5377 } else
5378 RetOpc = PPCISD::CALL;
5379 if (IsStrictFPCall) {
5380 switch (RetOpc) {
5381 default:
5382 llvm_unreachable("Unknown call opcode");
5383 case PPCISD::BCTRL_LOAD_TOC:
5384 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5385 break;
5386 case PPCISD::BCTRL:
5387 RetOpc = PPCISD::BCTRL_RM;
5388 break;
5389 case PPCISD::CALL_NOTOC:
5390 RetOpc = PPCISD::CALL_NOTOC_RM;
5391 break;
5392 case PPCISD::CALL:
5393 RetOpc = PPCISD::CALL_RM;
5394 break;
5395 case PPCISD::CALL_NOP:
5396 RetOpc = PPCISD::CALL_NOP_RM;
5397 break;
5398 }
5399 }
5400 return RetOpc;
5401}
5402
5403static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5404 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5405 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5406 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5407 return SDValue(Dest, 0);
5408
5409 // Returns true if the callee is local, and false otherwise.
5410 auto isLocalCallee = [&]() {
5412 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5413
5414 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5416 };
5417
5418 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5419 // a static relocation model causes some versions of GNU LD (2.17.50, at
5420 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5421 // built with secure-PLT.
5422 bool UsePlt =
5423 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5425
5426 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5427 const TargetMachine &TM = Subtarget.getTargetMachine();
5429 auto *S =
5430 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5431
5433 return DAG.getMCSymbol(S, PtrVT);
5434 };
5435
5436 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5437 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5438 if (isFunctionGlobalAddress(GV)) {
5439 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5440
5441 if (Subtarget.isAIXABI()) {
5442 return getAIXFuncEntryPointSymbolSDNode(GV);
5443 }
5444 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5445 UsePlt ? PPCII::MO_PLT : 0);
5446 }
5447
5449 const char *SymName = S->getSymbol();
5450 if (Subtarget.isAIXABI()) {
5451 // If there exists a user-declared function whose name is the same as the
5452 // ExternalSymbol's, then we pick up the user-declared version.
5454 if (const Function *F =
5455 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5456 return getAIXFuncEntryPointSymbolSDNode(F);
5457
5458 // On AIX, direct function calls reference the symbol for the function's
5459 // entry point, which is named by prepending a "." before the function's
5460 // C-linkage name. A Qualname is returned here because an external
5461 // function entry point is a csect with XTY_ER property.
5462 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5463 auto &Context = DAG.getMachineFunction().getContext();
5464 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5465 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5467 return Sec->getQualNameSymbol();
5468 };
5469
5470 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5471 }
5472 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5473 UsePlt ? PPCII::MO_PLT : 0);
5474 }
5475
5476 // No transformation needed.
5477 assert(Callee.getNode() && "What no callee?");
5478 return Callee;
5479}
5480
5482 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5483 "Expected a CALLSEQ_STARTSDNode.");
5484
5485 // The last operand is the chain, except when the node has glue. If the node
5486 // has glue, then the last operand is the glue, and the chain is the second
5487 // last operand.
5488 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5489 if (LastValue.getValueType() != MVT::Glue)
5490 return LastValue;
5491
5492 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5493}
5494
5495// Creates the node that moves a functions address into the count register
5496// to prepare for an indirect call instruction.
5497static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5498 SDValue &Glue, SDValue &Chain,
5499 const SDLoc &dl) {
5500 SDValue MTCTROps[] = {Chain, Callee, Glue};
5501 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5502 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5503 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5504 // The glue is the second value produced.
5505 Glue = Chain.getValue(1);
5506}
5507
5509 SDValue &Glue, SDValue &Chain,
5510 SDValue CallSeqStart,
5511 const CallBase *CB, const SDLoc &dl,
5512 bool hasNest,
5513 const PPCSubtarget &Subtarget) {
5514 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5515 // entry point, but to the function descriptor (the function entry point
5516 // address is part of the function descriptor though).
5517 // The function descriptor is a three doubleword structure with the
5518 // following fields: function entry point, TOC base address and
5519 // environment pointer.
5520 // Thus for a call through a function pointer, the following actions need
5521 // to be performed:
5522 // 1. Save the TOC of the caller in the TOC save area of its stack
5523 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5524 // 2. Load the address of the function entry point from the function
5525 // descriptor.
5526 // 3. Load the TOC of the callee from the function descriptor into r2.
5527 // 4. Load the environment pointer from the function descriptor into
5528 // r11.
5529 // 5. Branch to the function entry point address.
5530 // 6. On return of the callee, the TOC of the caller needs to be
5531 // restored (this is done in FinishCall()).
5532 //
5533 // The loads are scheduled at the beginning of the call sequence, and the
5534 // register copies are flagged together to ensure that no other
5535 // operations can be scheduled in between. E.g. without flagging the
5536 // copies together, a TOC access in the caller could be scheduled between
5537 // the assignment of the callee TOC and the branch to the callee, which leads
5538 // to incorrect code.
5539
5540 // Start by loading the function address from the descriptor.
5541 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5542 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5546
5547 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5548
5549 // Registers used in building the DAG.
5550 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5551 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5552
5553 // Offsets of descriptor members.
5554 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5555 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5556
5557 const MVT RegVT = Subtarget.getScalarIntVT();
5558 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5559
5560 // One load for the functions entry point address.
5561 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5562 Alignment, MMOFlags);
5563
5564 // One for loading the TOC anchor for the module that contains the called
5565 // function.
5566 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5567 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5568 SDValue TOCPtr =
5569 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5570 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5571
5572 // One for loading the environment pointer.
5573 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5574 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5575 SDValue LoadEnvPtr =
5576 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5577 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5578
5579
5580 // Then copy the newly loaded TOC anchor to the TOC pointer.
5581 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5582 Chain = TOCVal.getValue(0);
5583 Glue = TOCVal.getValue(1);
5584
5585 // If the function call has an explicit 'nest' parameter, it takes the
5586 // place of the environment pointer.
5587 assert((!hasNest || !Subtarget.isAIXABI()) &&
5588 "Nest parameter is not supported on AIX.");
5589 if (!hasNest) {
5590 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5591 Chain = EnvVal.getValue(0);
5592 Glue = EnvVal.getValue(1);
5593 }
5594
5595 // The rest of the indirect call sequence is the same as the non-descriptor
5596 // DAG.
5597 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5598}
5599
5600static void
5602 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5603 SelectionDAG &DAG,
5604 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5605 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5606 const PPCSubtarget &Subtarget) {
5607 const bool IsPPC64 = Subtarget.isPPC64();
5608 // MVT for a general purpose register.
5609 const MVT RegVT = Subtarget.getScalarIntVT();
5610
5611 // First operand is always the chain.
5612 Ops.push_back(Chain);
5613
5614 // If it's a direct call pass the callee as the second operand.
5615 if (!CFlags.IsIndirect)
5616 Ops.push_back(Callee);
5617 else {
5618 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5619
5620 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5621 // on the stack (this would have been done in `LowerCall_64SVR4` or
5622 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5623 // represents both the indirect branch and a load that restores the TOC
5624 // pointer from the linkage area. The operand for the TOC restore is an add
5625 // of the TOC save offset to the stack pointer. This must be the second
5626 // operand: after the chain input but before any other variadic arguments.
5627 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5628 // saved or used.
5629 if (isTOCSaveRestoreRequired(Subtarget)) {
5630 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5631
5632 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5633 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5634 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5635 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5636 Ops.push_back(AddTOC);
5637 }
5638
5639 // Add the register used for the environment pointer.
5640 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5641 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5642 RegVT));
5643
5644
5645 // Add CTR register as callee so a bctr can be emitted later.
5646 if (CFlags.IsTailCall)
5647 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5648 }
5649
5650 // If this is a tail call add stack pointer delta.
5651 if (CFlags.IsTailCall)
5652 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5653
5654 // Add argument registers to the end of the list so that they are known live
5655 // into the call.
5656 for (const auto &[Reg, N] : RegsToPass)
5657 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5658
5659 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5660 // no way to mark dependencies as implicit here.
5661 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5662 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5663 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5664 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5665
5666 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5667 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5668 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5669
5670 // Add a register mask operand representing the call-preserved registers.
5671 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5672 const uint32_t *Mask =
5673 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5674 assert(Mask && "Missing call preserved mask for calling convention");
5675 Ops.push_back(DAG.getRegisterMask(Mask));
5676
5677 // If the glue is valid, it is the last operand.
5678 if (Glue.getNode())
5679 Ops.push_back(Glue);
5680}
5681
5682SDValue PPCTargetLowering::FinishCall(
5683 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5684 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5685 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5686 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5687 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5688
5689 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5690 Subtarget.isAIXABI())
5691 setUsesTOCBasePtr(DAG);
5692
5693 unsigned CallOpc =
5694 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5695 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5696
5697 if (!CFlags.IsIndirect)
5698 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5699 else if (Subtarget.usesFunctionDescriptors())
5700 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5701 dl, CFlags.HasNest, Subtarget);
5702 else
5703 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5704
5705 // Build the operand list for the call instruction.
5707 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5708 SPDiff, Subtarget);
5709
5710 // Emit tail call.
5711 if (CFlags.IsTailCall) {
5712 // Indirect tail call when using PC Relative calls do not have the same
5713 // constraints.
5714 assert(((Callee.getOpcode() == ISD::Register &&
5715 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5716 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5717 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5718 isa<ConstantSDNode>(Callee) ||
5719 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5720 "Expecting a global address, external symbol, absolute value, "
5721 "register or an indirect tail call when PC Relative calls are "
5722 "used.");
5723 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5724 assert(CallOpc == PPCISD::TC_RETURN &&
5725 "Unexpected call opcode for a tail call.");
5727 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5728 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5729 return Ret;
5730 }
5731
5732 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5733 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5734 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5735 Glue = Chain.getValue(1);
5736
5737 // When performing tail call optimization the callee pops its arguments off
5738 // the stack. Account for this here so these bytes can be pushed back on in
5739 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5740 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5742 ? NumBytes
5743 : 0;
5744
5745 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5746 Glue = Chain.getValue(1);
5747
5748 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5749 DAG, InVals);
5750}
5751
5753 CallingConv::ID CalleeCC = CB->getCallingConv();
5754 const Function *CallerFunc = CB->getCaller();
5755 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5756 const Function *CalleeFunc = CB->getCalledFunction();
5757 if (!CalleeFunc)
5758 return false;
5759 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5760
5763
5764 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5765 CalleeFunc->getAttributes(), Outs, *this,
5766 CalleeFunc->getDataLayout());
5767
5768 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5769 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5770 false /*isCalleeExternalSymbol*/);
5771}
5772
5773bool PPCTargetLowering::isEligibleForTCO(
5774 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5775 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5777 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5778 bool isCalleeExternalSymbol) const {
5779 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5780 return false;
5781
5782 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5783 return IsEligibleForTailCallOptimization_64SVR4(
5784 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5785 isCalleeExternalSymbol);
5786 else
5787 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5788 isVarArg, Ins);
5789}
5790
5791SDValue
5792PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5793 SmallVectorImpl<SDValue> &InVals) const {
5794 SelectionDAG &DAG = CLI.DAG;
5795 SDLoc &dl = CLI.DL;
5797 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5799 SDValue Chain = CLI.Chain;
5800 SDValue Callee = CLI.Callee;
5801 bool &isTailCall = CLI.IsTailCall;
5802 CallingConv::ID CallConv = CLI.CallConv;
5803 bool isVarArg = CLI.IsVarArg;
5804 bool isPatchPoint = CLI.IsPatchPoint;
5805 const CallBase *CB = CLI.CB;
5806
5807 if (isTailCall) {
5809 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5810 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5811 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5812 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5813
5814 isTailCall =
5815 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5816 &(MF.getFunction()), IsCalleeExternalSymbol);
5817 if (isTailCall) {
5818 ++NumTailCalls;
5819 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5820 ++NumSiblingCalls;
5821
5822 // PC Relative calls no longer guarantee that the callee is a Global
5823 // Address Node. The callee could be an indirect tail call in which
5824 // case the SDValue for the callee could be a load (to load the address
5825 // of a function pointer) or it may be a register copy (to move the
5826 // address of the callee from a function parameter into a virtual
5827 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5828 assert((Subtarget.isUsingPCRelativeCalls() ||
5829 isa<GlobalAddressSDNode>(Callee)) &&
5830 "Callee should be an llvm::Function object.");
5831
5832 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5833 << "\nTCO callee: ");
5834 LLVM_DEBUG(Callee.dump());
5835 }
5836 }
5837
5838 if (!isTailCall && CB && CB->isMustTailCall())
5839 report_fatal_error("failed to perform tail call elimination on a call "
5840 "site marked musttail");
5841
5842 // When long calls (i.e. indirect calls) are always used, calls are always
5843 // made via function pointer. If we have a function name, first translate it
5844 // into a pointer.
5845 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5846 !isTailCall)
5847 Callee = LowerGlobalAddress(Callee, DAG);
5848
5849 CallFlags CFlags(
5850 CallConv, isTailCall, isVarArg, isPatchPoint,
5851 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5852 // hasNest
5853 Subtarget.is64BitELFABI() &&
5854 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5855 CLI.NoMerge);
5856
5857 if (Subtarget.isAIXABI())
5858 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5859 InVals, CB);
5860
5861 assert(Subtarget.isSVR4ABI());
5862 if (Subtarget.isPPC64())
5863 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5864 InVals, CB);
5865 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5866 InVals, CB);
5867}
5868
5869SDValue PPCTargetLowering::LowerCall_32SVR4(
5870 SDValue Chain, SDValue Callee, CallFlags CFlags,
5872 const SmallVectorImpl<SDValue> &OutVals,
5873 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5875 const CallBase *CB) const {
5876 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5877 // of the 32-bit SVR4 ABI stack frame layout.
5878
5879 const CallingConv::ID CallConv = CFlags.CallConv;
5880 const bool IsVarArg = CFlags.IsVarArg;
5881 const bool IsTailCall = CFlags.IsTailCall;
5882
5883 assert((CallConv == CallingConv::C ||
5884 CallConv == CallingConv::Cold ||
5885 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5886
5887 const Align PtrAlign(4);
5888
5889 MachineFunction &MF = DAG.getMachineFunction();
5890
5891 // Mark this function as potentially containing a function that contains a
5892 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5893 // and restoring the callers stack pointer in this functions epilog. This is
5894 // done because by tail calling the called function might overwrite the value
5895 // in this function's (MF) stack pointer stack slot 0(SP).
5896 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5897 CallConv == CallingConv::Fast)
5898 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5899
5900 // Count how many bytes are to be pushed on the stack, including the linkage
5901 // area, parameter list area and the part of the local variable space which
5902 // contains copies of aggregates which are passed by value.
5903
5904 // Assign locations to all of the outgoing arguments.
5906 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5907
5908 // Reserve space for the linkage area on the stack.
5909 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5910 PtrAlign);
5911
5912 if (IsVarArg) {
5913 // Handle fixed and variable vector arguments differently.
5914 // Fixed vector arguments go into registers as long as registers are
5915 // available. Variable vector arguments always go into memory.
5916 unsigned NumArgs = Outs.size();
5917
5918 for (unsigned i = 0; i != NumArgs; ++i) {
5919 MVT ArgVT = Outs[i].VT;
5920 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5921 bool Result;
5922
5923 if (!ArgFlags.isVarArg()) {
5924 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5925 Outs[i].OrigTy, CCInfo);
5926 } else {
5928 ArgFlags, Outs[i].OrigTy, CCInfo);
5929 }
5930
5931 if (Result) {
5932#ifndef NDEBUG
5933 errs() << "Call operand #" << i << " has unhandled type "
5934 << ArgVT << "\n";
5935#endif
5936 llvm_unreachable(nullptr);
5937 }
5938 }
5939 } else {
5940 // All arguments are treated the same.
5941 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5942 }
5943
5944 // Assign locations to all of the outgoing aggregate by value arguments.
5945 SmallVector<CCValAssign, 16> ByValArgLocs;
5946 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5947
5948 // Reserve stack space for the allocations in CCInfo.
5949 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5950
5951 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5952
5953 // Size of the linkage area, parameter list area and the part of the local
5954 // space variable where copies of aggregates which are passed by value are
5955 // stored.
5956 unsigned NumBytes = CCByValInfo.getStackSize();
5957
5958 // Calculate by how many bytes the stack has to be adjusted in case of tail
5959 // call optimization.
5960 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5961
5962 // Adjust the stack pointer for the new arguments...
5963 // These operations are automatically eliminated by the prolog/epilog pass
5964 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5965 SDValue CallSeqStart = Chain;
5966
5967 // Load the return address and frame pointer so it can be moved somewhere else
5968 // later.
5969 SDValue LROp, FPOp;
5970 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5971
5972 // Set up a copy of the stack pointer for use loading and storing any
5973 // arguments that may not fit in the registers available for argument
5974 // passing.
5975 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5976
5978 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5979 SmallVector<SDValue, 8> MemOpChains;
5980
5981 bool seenFloatArg = false;
5982 // Walk the register/memloc assignments, inserting copies/loads.
5983 // i - Tracks the index into the list of registers allocated for the call
5984 // RealArgIdx - Tracks the index into the list of actual function arguments
5985 // j - Tracks the index into the list of byval arguments
5986 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5987 i != e;
5988 ++i, ++RealArgIdx) {
5989 CCValAssign &VA = ArgLocs[i];
5990 SDValue Arg = OutVals[RealArgIdx];
5991 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5992
5993 if (Flags.isByVal()) {
5994 // Argument is an aggregate which is passed by value, thus we need to
5995 // create a copy of it in the local variable space of the current stack
5996 // frame (which is the stack frame of the caller) and pass the address of
5997 // this copy to the callee.
5998 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5999 CCValAssign &ByValVA = ByValArgLocs[j++];
6000 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6001
6002 // Memory reserved in the local variable space of the callers stack frame.
6003 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6004
6005 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6006 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6007 StackPtr, PtrOff);
6008
6009 // Create a copy of the argument in the local area of the current
6010 // stack frame.
6011 SDValue MemcpyCall =
6012 CreateCopyOfByValArgument(Arg, PtrOff,
6013 CallSeqStart.getNode()->getOperand(0),
6014 Flags, DAG, dl);
6015
6016 // This must go outside the CALLSEQ_START..END.
6017 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6018 SDLoc(MemcpyCall));
6019 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6020 NewCallSeqStart.getNode());
6021 Chain = CallSeqStart = NewCallSeqStart;
6022
6023 // Pass the address of the aggregate copy on the stack either in a
6024 // physical register or in the parameter list area of the current stack
6025 // frame to the callee.
6026 Arg = PtrOff;
6027 }
6028
6029 // When useCRBits() is true, there can be i1 arguments.
6030 // It is because getRegisterType(MVT::i1) => MVT::i1,
6031 // and for other integer types getRegisterType() => MVT::i32.
6032 // Extend i1 and ensure callee will get i32.
6033 if (Arg.getValueType() == MVT::i1)
6034 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6035 dl, MVT::i32, Arg);
6036
6037 if (VA.isRegLoc()) {
6038 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6039 // Put argument in a physical register.
6040 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6041 bool IsLE = Subtarget.isLittleEndian();
6042 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6043 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6044 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6045 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6046 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6047 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6048 SVal.getValue(0)));
6049 } else
6050 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6051 } else {
6052 // Put argument in the parameter list area of the current stack frame.
6053 assert(VA.isMemLoc());
6054 unsigned LocMemOffset = VA.getLocMemOffset();
6055
6056 if (!IsTailCall) {
6057 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6058 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6059 StackPtr, PtrOff);
6060
6061 MemOpChains.push_back(
6062 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6063 } else {
6064 // Calculate and remember argument location.
6065 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6066 TailCallArguments);
6067 }
6068 }
6069 }
6070
6071 if (!MemOpChains.empty())
6072 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6073
6074 // Build a sequence of copy-to-reg nodes chained together with token chain
6075 // and flag operands which copy the outgoing args into the appropriate regs.
6076 SDValue InGlue;
6077 for (const auto &[Reg, N] : RegsToPass) {
6078 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6079 InGlue = Chain.getValue(1);
6080 }
6081
6082 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6083 // registers.
6084 if (IsVarArg) {
6085 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6086 SDValue Ops[] = { Chain, InGlue };
6087
6088 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6089 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6090
6091 InGlue = Chain.getValue(1);
6092 }
6093
6094 if (IsTailCall)
6095 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6096 TailCallArguments);
6097
6098 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6099 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6100}
6101
6102// Copy an argument into memory, being careful to do this outside the
6103// call sequence for the call to which the argument belongs.
6104SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6105 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6106 SelectionDAG &DAG, const SDLoc &dl) const {
6107 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6108 CallSeqStart.getNode()->getOperand(0),
6109 Flags, DAG, dl);
6110 // The MEMCPY must go outside the CALLSEQ_START..END.
6111 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6112 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6113 SDLoc(MemcpyCall));
6114 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6115 NewCallSeqStart.getNode());
6116 return NewCallSeqStart;
6117}
6118
6119SDValue PPCTargetLowering::LowerCall_64SVR4(
6120 SDValue Chain, SDValue Callee, CallFlags CFlags,
6122 const SmallVectorImpl<SDValue> &OutVals,
6123 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6125 const CallBase *CB) const {
6126 bool isELFv2ABI = Subtarget.isELFv2ABI();
6127 bool isLittleEndian = Subtarget.isLittleEndian();
6128 unsigned NumOps = Outs.size();
6129 bool IsSibCall = false;
6130 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6131
6132 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6133 unsigned PtrByteSize = 8;
6134
6135 MachineFunction &MF = DAG.getMachineFunction();
6136
6137 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6138 IsSibCall = true;
6139
6140 // Mark this function as potentially containing a function that contains a
6141 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6142 // and restoring the callers stack pointer in this functions epilog. This is
6143 // done because by tail calling the called function might overwrite the value
6144 // in this function's (MF) stack pointer stack slot 0(SP).
6145 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6146 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6147
6148 assert(!(IsFastCall && CFlags.IsVarArg) &&
6149 "fastcc not supported on varargs functions");
6150
6151 // Count how many bytes are to be pushed on the stack, including the linkage
6152 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6153 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6154 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6155 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6156 unsigned NumBytes = LinkageSize;
6157 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6158
6159 static const MCPhysReg GPR[] = {
6160 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6161 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6162 };
6163 static const MCPhysReg VR[] = {
6164 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6165 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6166 };
6167
6168 const unsigned NumGPRs = std::size(GPR);
6169 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6170 const unsigned NumVRs = std::size(VR);
6171
6172 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6173 // can be passed to the callee in registers.
6174 // For the fast calling convention, there is another check below.
6175 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6176 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6177 if (!HasParameterArea) {
6178 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6179 unsigned AvailableFPRs = NumFPRs;
6180 unsigned AvailableVRs = NumVRs;
6181 unsigned NumBytesTmp = NumBytes;
6182 for (unsigned i = 0; i != NumOps; ++i) {
6183 if (Outs[i].Flags.isNest()) continue;
6184 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6185 PtrByteSize, LinkageSize, ParamAreaSize,
6186 NumBytesTmp, AvailableFPRs, AvailableVRs))
6187 HasParameterArea = true;
6188 }
6189 }
6190
6191 // When using the fast calling convention, we don't provide backing for
6192 // arguments that will be in registers.
6193 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6194
6195 // Avoid allocating parameter area for fastcc functions if all the arguments
6196 // can be passed in the registers.
6197 if (IsFastCall)
6198 HasParameterArea = false;
6199
6200 // Add up all the space actually used.
6201 for (unsigned i = 0; i != NumOps; ++i) {
6202 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6203 EVT ArgVT = Outs[i].VT;
6204 EVT OrigVT = Outs[i].ArgVT;
6205
6206 if (Flags.isNest())
6207 continue;
6208
6209 if (IsFastCall) {
6210 if (Flags.isByVal()) {
6211 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6212 if (NumGPRsUsed > NumGPRs)
6213 HasParameterArea = true;
6214 } else {
6215 switch (ArgVT.getSimpleVT().SimpleTy) {
6216 default: llvm_unreachable("Unexpected ValueType for argument!");
6217 case MVT::i1:
6218 case MVT::i32:
6219 case MVT::i64:
6220 if (++NumGPRsUsed <= NumGPRs)
6221 continue;
6222 break;
6223 case MVT::v4i32:
6224 case MVT::v8i16:
6225 case MVT::v16i8:
6226 case MVT::v2f64:
6227 case MVT::v2i64:
6228 case MVT::v1i128:
6229 case MVT::f128:
6230 if (++NumVRsUsed <= NumVRs)
6231 continue;
6232 break;
6233 case MVT::v4f32:
6234 if (++NumVRsUsed <= NumVRs)
6235 continue;
6236 break;
6237 case MVT::f32:
6238 case MVT::f64:
6239 if (++NumFPRsUsed <= NumFPRs)
6240 continue;
6241 break;
6242 }
6243 HasParameterArea = true;
6244 }
6245 }
6246
6247 /* Respect alignment of argument on the stack. */
6248 auto Alignement =
6249 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6250 NumBytes = alignTo(NumBytes, Alignement);
6251
6252 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6253 if (Flags.isInConsecutiveRegsLast())
6254 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6255 }
6256
6257 unsigned NumBytesActuallyUsed = NumBytes;
6258
6259 // In the old ELFv1 ABI,
6260 // the prolog code of the callee may store up to 8 GPR argument registers to
6261 // the stack, allowing va_start to index over them in memory if its varargs.
6262 // Because we cannot tell if this is needed on the caller side, we have to
6263 // conservatively assume that it is needed. As such, make sure we have at
6264 // least enough stack space for the caller to store the 8 GPRs.
6265 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6266 // really requires memory operands, e.g. a vararg function.
6267 if (HasParameterArea)
6268 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6269 else
6270 NumBytes = LinkageSize;
6271
6272 // Tail call needs the stack to be aligned.
6273 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6274 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6275
6276 int SPDiff = 0;
6277
6278 // Calculate by how many bytes the stack has to be adjusted in case of tail
6279 // call optimization.
6280 if (!IsSibCall)
6281 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6282
6283 // To protect arguments on the stack from being clobbered in a tail call,
6284 // force all the loads to happen before doing any other lowering.
6285 if (CFlags.IsTailCall)
6286 Chain = DAG.getStackArgumentTokenFactor(Chain);
6287
6288 // Adjust the stack pointer for the new arguments...
6289 // These operations are automatically eliminated by the prolog/epilog pass
6290 if (!IsSibCall)
6291 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6292 SDValue CallSeqStart = Chain;
6293
6294 // Load the return address and frame pointer so it can be move somewhere else
6295 // later.
6296 SDValue LROp, FPOp;
6297 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6298
6299 // Set up a copy of the stack pointer for use loading and storing any
6300 // arguments that may not fit in the registers available for argument
6301 // passing.
6302 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6303
6304 // Figure out which arguments are going to go in registers, and which in
6305 // memory. Also, if this is a vararg function, floating point operations
6306 // must be stored to our stack, and loaded into integer regs as well, if
6307 // any integer regs are available for argument passing.
6308 unsigned ArgOffset = LinkageSize;
6309
6311 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6312
6313 SmallVector<SDValue, 8> MemOpChains;
6314 for (unsigned i = 0; i != NumOps; ++i) {
6315 SDValue Arg = OutVals[i];
6316 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6317 EVT ArgVT = Outs[i].VT;
6318 EVT OrigVT = Outs[i].ArgVT;
6319
6320 // PtrOff will be used to store the current argument to the stack if a
6321 // register cannot be found for it.
6322 SDValue PtrOff;
6323
6324 // We re-align the argument offset for each argument, except when using the
6325 // fast calling convention, when we need to make sure we do that only when
6326 // we'll actually use a stack slot.
6327 auto ComputePtrOff = [&]() {
6328 /* Respect alignment of argument on the stack. */
6329 auto Alignment =
6330 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6331 ArgOffset = alignTo(ArgOffset, Alignment);
6332
6333 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6334
6335 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6336 };
6337
6338 if (!IsFastCall) {
6339 ComputePtrOff();
6340
6341 /* Compute GPR index associated with argument offset. */
6342 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6343 GPR_idx = std::min(GPR_idx, NumGPRs);
6344 }
6345
6346 // Promote integers to 64-bit values.
6347 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6348 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6349 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6350 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6351 }
6352
6353 // FIXME memcpy is used way more than necessary. Correctness first.
6354 // Note: "by value" is code for passing a structure by value, not
6355 // basic types.
6356 if (Flags.isByVal()) {
6357 // Note: Size includes alignment padding, so
6358 // struct x { short a; char b; }
6359 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6360 // These are the proper values we need for right-justifying the
6361 // aggregate in a parameter register.
6362 unsigned Size = Flags.getByValSize();
6363
6364 // An empty aggregate parameter takes up no storage and no
6365 // registers.
6366 if (Size == 0)
6367 continue;
6368
6369 if (IsFastCall)
6370 ComputePtrOff();
6371
6372 // All aggregates smaller than 8 bytes must be passed right-justified.
6373 if (Size==1 || Size==2 || Size==4) {
6374 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6375 if (GPR_idx != NumGPRs) {
6376 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6377 MachinePointerInfo(), VT);
6378 MemOpChains.push_back(Load.getValue(1));
6379 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6380
6381 ArgOffset += PtrByteSize;
6382 continue;
6383 }
6384 }
6385
6386 if (GPR_idx == NumGPRs && Size < 8) {
6387 SDValue AddPtr = PtrOff;
6388 if (!isLittleEndian) {
6389 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6390 PtrOff.getValueType());
6391 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6392 }
6393 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6394 CallSeqStart,
6395 Flags, DAG, dl);
6396 ArgOffset += PtrByteSize;
6397 continue;
6398 }
6399 // Copy the object to parameter save area if it can not be entirely passed
6400 // by registers.
6401 // FIXME: we only need to copy the parts which need to be passed in
6402 // parameter save area. For the parts passed by registers, we don't need
6403 // to copy them to the stack although we need to allocate space for them
6404 // in parameter save area.
6405 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6406 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6407 CallSeqStart,
6408 Flags, DAG, dl);
6409
6410 // When a register is available, pass a small aggregate right-justified.
6411 if (Size < 8 && GPR_idx != NumGPRs) {
6412 // The easiest way to get this right-justified in a register
6413 // is to copy the structure into the rightmost portion of a
6414 // local variable slot, then load the whole slot into the
6415 // register.
6416 // FIXME: The memcpy seems to produce pretty awful code for
6417 // small aggregates, particularly for packed ones.
6418 // FIXME: It would be preferable to use the slot in the
6419 // parameter save area instead of a new local variable.
6420 SDValue AddPtr = PtrOff;
6421 if (!isLittleEndian) {
6422 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6423 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6424 }
6425 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6426 CallSeqStart,
6427 Flags, DAG, dl);
6428
6429 // Load the slot into the register.
6430 SDValue Load =
6431 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6432 MemOpChains.push_back(Load.getValue(1));
6433 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6434
6435 // Done with this argument.
6436 ArgOffset += PtrByteSize;
6437 continue;
6438 }
6439
6440 // For aggregates larger than PtrByteSize, copy the pieces of the
6441 // object that fit into registers from the parameter save area.
6442 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6443 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6444 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6445 if (GPR_idx != NumGPRs) {
6446 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6447 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6448 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6449 MachinePointerInfo(), ObjType);
6450
6451 MemOpChains.push_back(Load.getValue(1));
6452 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6453 ArgOffset += PtrByteSize;
6454 } else {
6455 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6456 break;
6457 }
6458 }
6459 continue;
6460 }
6461
6462 switch (Arg.getSimpleValueType().SimpleTy) {
6463 default: llvm_unreachable("Unexpected ValueType for argument!");
6464 case MVT::i1:
6465 case MVT::i32:
6466 case MVT::i64:
6467 if (Flags.isNest()) {
6468 // The 'nest' parameter, if any, is passed in R11.
6469 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6470 break;
6471 }
6472
6473 // These can be scalar arguments or elements of an integer array type
6474 // passed directly. Clang may use those instead of "byval" aggregate
6475 // types to avoid forcing arguments to memory unnecessarily.
6476 if (GPR_idx != NumGPRs) {
6477 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6478 } else {
6479 if (IsFastCall)
6480 ComputePtrOff();
6481
6482 assert(HasParameterArea &&
6483 "Parameter area must exist to pass an argument in memory.");
6484 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6485 true, CFlags.IsTailCall, false, MemOpChains,
6486 TailCallArguments, dl);
6487 if (IsFastCall)
6488 ArgOffset += PtrByteSize;
6489 }
6490 if (!IsFastCall)
6491 ArgOffset += PtrByteSize;
6492 break;
6493 case MVT::f32:
6494 case MVT::f64: {
6495 // These can be scalar arguments or elements of a float array type
6496 // passed directly. The latter are used to implement ELFv2 homogenous
6497 // float aggregates.
6498
6499 // Named arguments go into FPRs first, and once they overflow, the
6500 // remaining arguments go into GPRs and then the parameter save area.
6501 // Unnamed arguments for vararg functions always go to GPRs and
6502 // then the parameter save area. For now, put all arguments to vararg
6503 // routines always in both locations (FPR *and* GPR or stack slot).
6504 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6505 bool NeededLoad = false;
6506
6507 // First load the argument into the next available FPR.
6508 if (FPR_idx != NumFPRs)
6509 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6510
6511 // Next, load the argument into GPR or stack slot if needed.
6512 if (!NeedGPROrStack)
6513 ;
6514 else if (GPR_idx != NumGPRs && !IsFastCall) {
6515 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6516 // once we support fp <-> gpr moves.
6517
6518 // In the non-vararg case, this can only ever happen in the
6519 // presence of f32 array types, since otherwise we never run
6520 // out of FPRs before running out of GPRs.
6521 SDValue ArgVal;
6522
6523 // Double values are always passed in a single GPR.
6524 if (Arg.getValueType() != MVT::f32) {
6525 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6526
6527 // Non-array float values are extended and passed in a GPR.
6528 } else if (!Flags.isInConsecutiveRegs()) {
6529 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6530 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6531
6532 // If we have an array of floats, we collect every odd element
6533 // together with its predecessor into one GPR.
6534 } else if (ArgOffset % PtrByteSize != 0) {
6535 SDValue Lo, Hi;
6536 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6537 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6538 if (!isLittleEndian)
6539 std::swap(Lo, Hi);
6540 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6541
6542 // The final element, if even, goes into the first half of a GPR.
6543 } else if (Flags.isInConsecutiveRegsLast()) {
6544 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6545 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6546 if (!isLittleEndian)
6547 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6548 DAG.getConstant(32, dl, MVT::i32));
6549
6550 // Non-final even elements are skipped; they will be handled
6551 // together the with subsequent argument on the next go-around.
6552 } else
6553 ArgVal = SDValue();
6554
6555 if (ArgVal.getNode())
6556 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6557 } else {
6558 if (IsFastCall)
6559 ComputePtrOff();
6560
6561 // Single-precision floating-point values are mapped to the
6562 // second (rightmost) word of the stack doubleword.
6563 if (Arg.getValueType() == MVT::f32 &&
6564 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6565 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6566 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6567 }
6568
6569 assert(HasParameterArea &&
6570 "Parameter area must exist to pass an argument in memory.");
6571 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6572 true, CFlags.IsTailCall, false, MemOpChains,
6573 TailCallArguments, dl);
6574
6575 NeededLoad = true;
6576 }
6577 // When passing an array of floats, the array occupies consecutive
6578 // space in the argument area; only round up to the next doubleword
6579 // at the end of the array. Otherwise, each float takes 8 bytes.
6580 if (!IsFastCall || NeededLoad) {
6581 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6582 Flags.isInConsecutiveRegs()) ? 4 : 8;
6583 if (Flags.isInConsecutiveRegsLast())
6584 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6585 }
6586 break;
6587 }
6588 case MVT::v4f32:
6589 case MVT::v4i32:
6590 case MVT::v8i16:
6591 case MVT::v16i8:
6592 case MVT::v2f64:
6593 case MVT::v2i64:
6594 case MVT::v1i128:
6595 case MVT::f128:
6596 // These can be scalar arguments or elements of a vector array type
6597 // passed directly. The latter are used to implement ELFv2 homogenous
6598 // vector aggregates.
6599
6600 // For a varargs call, named arguments go into VRs or on the stack as
6601 // usual; unnamed arguments always go to the stack or the corresponding
6602 // GPRs when within range. For now, we always put the value in both
6603 // locations (or even all three).
6604 if (CFlags.IsVarArg) {
6605 assert(HasParameterArea &&
6606 "Parameter area must exist if we have a varargs call.");
6607 // We could elide this store in the case where the object fits
6608 // entirely in R registers. Maybe later.
6609 SDValue Store =
6610 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6611 MemOpChains.push_back(Store);
6612 if (VR_idx != NumVRs) {
6613 SDValue Load =
6614 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6615 MemOpChains.push_back(Load.getValue(1));
6616 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6617 }
6618 ArgOffset += 16;
6619 for (unsigned i=0; i<16; i+=PtrByteSize) {
6620 if (GPR_idx == NumGPRs)
6621 break;
6622 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6623 DAG.getConstant(i, dl, PtrVT));
6624 SDValue Load =
6625 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6626 MemOpChains.push_back(Load.getValue(1));
6627 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6628 }
6629 break;
6630 }
6631
6632 // Non-varargs Altivec params go into VRs or on the stack.
6633 if (VR_idx != NumVRs) {
6634 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6635 } else {
6636 if (IsFastCall)
6637 ComputePtrOff();
6638
6639 assert(HasParameterArea &&
6640 "Parameter area must exist to pass an argument in memory.");
6641 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6642 true, CFlags.IsTailCall, true, MemOpChains,
6643 TailCallArguments, dl);
6644 if (IsFastCall)
6645 ArgOffset += 16;
6646 }
6647
6648 if (!IsFastCall)
6649 ArgOffset += 16;
6650 break;
6651 }
6652 }
6653
6654 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6655 "mismatch in size of parameter area");
6656 (void)NumBytesActuallyUsed;
6657
6658 if (!MemOpChains.empty())
6659 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6660
6661 // Check if this is an indirect call (MTCTR/BCTRL).
6662 // See prepareDescriptorIndirectCall and buildCallOperands for more
6663 // information about calls through function pointers in the 64-bit SVR4 ABI.
6664 if (CFlags.IsIndirect) {
6665 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6666 // caller in the TOC save area.
6667 if (isTOCSaveRestoreRequired(Subtarget)) {
6668 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6669 // Load r2 into a virtual register and store it to the TOC save area.
6670 setUsesTOCBasePtr(DAG);
6671 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6672 // TOC save area offset.
6673 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6674 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6675 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6676 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6678 DAG.getMachineFunction(), TOCSaveOffset));
6679 }
6680 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6681 // This does not mean the MTCTR instruction must use R12; it's easier
6682 // to model this as an extra parameter, so do that.
6683 if (isELFv2ABI && !CFlags.IsPatchPoint)
6684 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6685 }
6686
6687 // Build a sequence of copy-to-reg nodes chained together with token chain
6688 // and flag operands which copy the outgoing args into the appropriate regs.
6689 SDValue InGlue;
6690 for (const auto &[Reg, N] : RegsToPass) {
6691 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6692 InGlue = Chain.getValue(1);
6693 }
6694
6695 if (CFlags.IsTailCall && !IsSibCall)
6696 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6697 TailCallArguments);
6698
6699 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6700 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6701}
6702
6703// Returns true when the shadow of a general purpose argument register
6704// in the parameter save area is aligned to at least 'RequiredAlign'.
6705static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6706 assert(RequiredAlign.value() <= 16 &&
6707 "Required alignment greater than stack alignment.");
6708 switch (Reg) {
6709 default:
6710 report_fatal_error("called on invalid register.");
6711 case PPC::R5:
6712 case PPC::R9:
6713 case PPC::X3:
6714 case PPC::X5:
6715 case PPC::X7:
6716 case PPC::X9:
6717 // These registers are 16 byte aligned which is the most strict aligment
6718 // we can support.
6719 return true;
6720 case PPC::R3:
6721 case PPC::R7:
6722 case PPC::X4:
6723 case PPC::X6:
6724 case PPC::X8:
6725 case PPC::X10:
6726 // The shadow of these registers in the PSA is 8 byte aligned.
6727 return RequiredAlign <= 8;
6728 case PPC::R4:
6729 case PPC::R6:
6730 case PPC::R8:
6731 case PPC::R10:
6732 return RequiredAlign <= 4;
6733 }
6734}
6735
6736static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6737 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6738 Type *OrigTy, CCState &State) {
6739 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6740 State.getMachineFunction().getSubtarget());
6741 const bool IsPPC64 = Subtarget.isPPC64();
6742 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6743 const Align PtrAlign(PtrSize);
6744 const Align StackAlign(16);
6745 const MVT RegVT = Subtarget.getScalarIntVT();
6746
6747 if (ValVT == MVT::f128)
6748 report_fatal_error("f128 is unimplemented on AIX.");
6749
6750 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6751 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6752 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6753 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6754 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6755 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6756
6757 static const MCPhysReg VR[] = {// Vector registers.
6758 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6759 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6760 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6761
6762 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6763
6764 if (ArgFlags.isNest()) {
6765 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6766 if (!EnvReg)
6767 report_fatal_error("More then one nest argument.");
6768 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6769 return false;
6770 }
6771
6772 if (ArgFlags.isByVal()) {
6773 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6774 if (ByValAlign > StackAlign)
6775 report_fatal_error("Pass-by-value arguments with alignment greater than "
6776 "16 are not supported.");
6777
6778 const unsigned ByValSize = ArgFlags.getByValSize();
6779 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6780
6781 // An empty aggregate parameter takes up no storage and no registers,
6782 // but needs a MemLoc for a stack slot for the formal arguments side.
6783 if (ByValSize == 0) {
6785 State.getStackSize(), RegVT, LocInfo));
6786 return false;
6787 }
6788
6789 // Shadow allocate any registers that are not properly aligned.
6790 unsigned NextReg = State.getFirstUnallocated(GPRs);
6791 while (NextReg != GPRs.size() &&
6792 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6793 // Shadow allocate next registers since its aligment is not strict enough.
6794 MCRegister Reg = State.AllocateReg(GPRs);
6795 // Allocate the stack space shadowed by said register.
6796 State.AllocateStack(PtrSize, PtrAlign);
6797 assert(Reg && "Alocating register unexpectedly failed.");
6798 (void)Reg;
6799 NextReg = State.getFirstUnallocated(GPRs);
6800 }
6801
6802 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6803 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6804 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6805 if (MCRegister Reg = State.AllocateReg(GPRs))
6806 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6807 else {
6810 LocInfo));
6811 break;
6812 }
6813 }
6814 return false;
6815 }
6816
6817 // Arguments always reserve parameter save area.
6818 switch (ValVT.SimpleTy) {
6819 default:
6820 report_fatal_error("Unhandled value type for argument.");
6821 case MVT::i64:
6822 // i64 arguments should have been split to i32 for PPC32.
6823 assert(IsPPC64 && "PPC32 should have split i64 values.");
6824 [[fallthrough]];
6825 case MVT::i1:
6826 case MVT::i32: {
6827 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6828 // AIX integer arguments are always passed in register width.
6829 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6830 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6832 if (MCRegister Reg = State.AllocateReg(GPRs))
6833 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6834 else
6835 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6836
6837 return false;
6838 }
6839 case MVT::f32:
6840 case MVT::f64: {
6841 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6842 const unsigned StoreSize = LocVT.getStoreSize();
6843 // Floats are always 4-byte aligned in the PSA on AIX.
6844 // This includes f64 in 64-bit mode for ABI compatibility.
6845 const unsigned Offset =
6846 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6847 MCRegister FReg = State.AllocateReg(FPR);
6848 if (FReg)
6849 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6850
6851 // Reserve and initialize GPRs or initialize the PSA as required.
6852 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6853 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6854 assert(FReg && "An FPR should be available when a GPR is reserved.");
6855 if (State.isVarArg()) {
6856 // Successfully reserved GPRs are only initialized for vararg calls.
6857 // Custom handling is required for:
6858 // f64 in PPC32 needs to be split into 2 GPRs.
6859 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6860 State.addLoc(
6861 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6862 }
6863 } else {
6864 // If there are insufficient GPRs, the PSA needs to be initialized.
6865 // Initialization occurs even if an FPR was initialized for
6866 // compatibility with the AIX XL compiler. The full memory for the
6867 // argument will be initialized even if a prior word is saved in GPR.
6868 // A custom memLoc is used when the argument also passes in FPR so
6869 // that the callee handling can skip over it easily.
6870 State.addLoc(
6871 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6872 LocInfo)
6873 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6874 break;
6875 }
6876 }
6877
6878 return false;
6879 }
6880 case MVT::v4f32:
6881 case MVT::v4i32:
6882 case MVT::v8i16:
6883 case MVT::v16i8:
6884 case MVT::v2i64:
6885 case MVT::v2f64:
6886 case MVT::v1i128: {
6887 const unsigned VecSize = 16;
6888 const Align VecAlign(VecSize);
6889
6890 if (!State.isVarArg()) {
6891 // If there are vector registers remaining we don't consume any stack
6892 // space.
6893 if (MCRegister VReg = State.AllocateReg(VR)) {
6894 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6895 return false;
6896 }
6897 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6898 // might be allocated in the portion of the PSA that is shadowed by the
6899 // GPRs.
6900 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6901 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6902 return false;
6903 }
6904
6905 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6906 // Burn any underaligned registers and their shadowed stack space until
6907 // we reach the required alignment.
6908 while (NextRegIndex != GPRs.size() &&
6909 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6910 // Shadow allocate register and its stack shadow.
6911 MCRegister Reg = State.AllocateReg(GPRs);
6912 State.AllocateStack(PtrSize, PtrAlign);
6913 assert(Reg && "Allocating register unexpectedly failed.");
6914 (void)Reg;
6915 NextRegIndex = State.getFirstUnallocated(GPRs);
6916 }
6917
6918 // Vectors that are passed as fixed arguments are handled differently.
6919 // They are passed in VRs if any are available (unlike arguments passed
6920 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6921 // functions)
6922 if (!ArgFlags.isVarArg()) {
6923 if (MCRegister VReg = State.AllocateReg(VR)) {
6924 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6925 // Shadow allocate GPRs and stack space even though we pass in a VR.
6926 for (unsigned I = 0; I != VecSize; I += PtrSize)
6927 State.AllocateReg(GPRs);
6928 State.AllocateStack(VecSize, VecAlign);
6929 return false;
6930 }
6931 // No vector registers remain so pass on the stack.
6932 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6933 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6934 return false;
6935 }
6936
6937 // If all GPRS are consumed then we pass the argument fully on the stack.
6938 if (NextRegIndex == GPRs.size()) {
6939 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6940 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6941 return false;
6942 }
6943
6944 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6945 // half of the argument, and then need to pass the remaining half on the
6946 // stack.
6947 if (GPRs[NextRegIndex] == PPC::R9) {
6948 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6949 State.addLoc(
6950 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6951
6952 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
6953 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
6954 assert(FirstReg && SecondReg &&
6955 "Allocating R9 or R10 unexpectedly failed.");
6956 State.addLoc(
6957 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6958 State.addLoc(
6959 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6960 return false;
6961 }
6962
6963 // We have enough GPRs to fully pass the vector argument, and we have
6964 // already consumed any underaligned registers. Start with the custom
6965 // MemLoc and then the custom RegLocs.
6966 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6967 State.addLoc(
6968 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6969 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6970 const MCRegister Reg = State.AllocateReg(GPRs);
6971 assert(Reg && "Failed to allocated register for vararg vector argument");
6972 State.addLoc(
6973 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6974 }
6975 return false;
6976 }
6977 }
6978 return true;
6979}
6980
6981// So far, this function is only used by LowerFormalArguments_AIX()
6983 bool IsPPC64,
6984 bool HasP8Vector,
6985 bool HasVSX) {
6986 assert((IsPPC64 || SVT != MVT::i64) &&
6987 "i64 should have been split for 32-bit codegen.");
6988
6989 switch (SVT) {
6990 default:
6991 report_fatal_error("Unexpected value type for formal argument");
6992 case MVT::i1:
6993 case MVT::i32:
6994 case MVT::i64:
6995 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6996 case MVT::f32:
6997 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6998 case MVT::f64:
6999 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7000 case MVT::v4f32:
7001 case MVT::v4i32:
7002 case MVT::v8i16:
7003 case MVT::v16i8:
7004 case MVT::v2i64:
7005 case MVT::v2f64:
7006 case MVT::v1i128:
7007 return &PPC::VRRCRegClass;
7008 }
7009}
7010
7012 SelectionDAG &DAG, SDValue ArgValue,
7013 MVT LocVT, const SDLoc &dl) {
7014 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7015 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7016
7017 if (Flags.isSExt())
7018 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7019 DAG.getValueType(ValVT));
7020 else if (Flags.isZExt())
7021 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7022 DAG.getValueType(ValVT));
7023
7024 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7025}
7026
7027static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7028 const unsigned LASize = FL->getLinkageSize();
7029
7030 if (PPC::GPRCRegClass.contains(Reg)) {
7031 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7032 "Reg must be a valid argument register!");
7033 return LASize + 4 * (Reg - PPC::R3);
7034 }
7035
7036 if (PPC::G8RCRegClass.contains(Reg)) {
7037 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7038 "Reg must be a valid argument register!");
7039 return LASize + 8 * (Reg - PPC::X3);
7040 }
7041
7042 llvm_unreachable("Only general purpose registers expected.");
7043}
7044
7045// AIX ABI Stack Frame Layout:
7046//
7047// Low Memory +--------------------------------------------+
7048// SP +---> | Back chain | ---+
7049// | +--------------------------------------------+ |
7050// | | Saved Condition Register | |
7051// | +--------------------------------------------+ |
7052// | | Saved Linkage Register | |
7053// | +--------------------------------------------+ | Linkage Area
7054// | | Reserved for compilers | |
7055// | +--------------------------------------------+ |
7056// | | Reserved for binders | |
7057// | +--------------------------------------------+ |
7058// | | Saved TOC pointer | ---+
7059// | +--------------------------------------------+
7060// | | Parameter save area |
7061// | +--------------------------------------------+
7062// | | Alloca space |
7063// | +--------------------------------------------+
7064// | | Local variable space |
7065// | +--------------------------------------------+
7066// | | Float/int conversion temporary |
7067// | +--------------------------------------------+
7068// | | Save area for AltiVec registers |
7069// | +--------------------------------------------+
7070// | | AltiVec alignment padding |
7071// | +--------------------------------------------+
7072// | | Save area for VRSAVE register |
7073// | +--------------------------------------------+
7074// | | Save area for General Purpose registers |
7075// | +--------------------------------------------+
7076// | | Save area for Floating Point registers |
7077// | +--------------------------------------------+
7078// +---- | Back chain |
7079// High Memory +--------------------------------------------+
7080//
7081// Specifications:
7082// AIX 7.2 Assembler Language Reference
7083// Subroutine linkage convention
7084
7085SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7086 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7087 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7088 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7089
7090 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7091 CallConv == CallingConv::Fast) &&
7092 "Unexpected calling convention!");
7093
7094 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7095 report_fatal_error("Tail call support is unimplemented on AIX.");
7096
7097 if (useSoftFloat())
7098 report_fatal_error("Soft float support is unimplemented on AIX.");
7099
7100 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7101
7102 const bool IsPPC64 = Subtarget.isPPC64();
7103 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7104
7105 // Assign locations to all of the incoming arguments.
7107 MachineFunction &MF = DAG.getMachineFunction();
7108 MachineFrameInfo &MFI = MF.getFrameInfo();
7109 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7110 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7111
7112 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7113 // Reserve space for the linkage area on the stack.
7114 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7115 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7116 uint64_t SaveStackPos = CCInfo.getStackSize();
7117 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7118 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7119
7121
7122 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7123 CCValAssign &VA = ArgLocs[I++];
7124 MVT LocVT = VA.getLocVT();
7125 MVT ValVT = VA.getValVT();
7126 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7127
7128 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7129 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7130 // For compatibility with the AIX XL compiler, the float args in the
7131 // parameter save area are initialized even if the argument is available
7132 // in register. The caller is required to initialize both the register
7133 // and memory, however, the callee can choose to expect it in either.
7134 // The memloc is dismissed here because the argument is retrieved from
7135 // the register.
7136 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7137 continue;
7138
7139 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7140 const TargetRegisterClass *RegClass = getRegClassForSVT(
7141 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7142 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7143 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7144 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7145 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7146 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7147 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7148 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7149 MachinePointerInfo(), Align(PtrByteSize));
7150 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7151 MemOps.push_back(StoreReg);
7152 }
7153
7154 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7155 unsigned StoreSize =
7156 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7157 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7158 }
7159
7160 auto HandleMemLoc = [&]() {
7161 const unsigned LocSize = LocVT.getStoreSize();
7162 const unsigned ValSize = ValVT.getStoreSize();
7163 assert((ValSize <= LocSize) &&
7164 "Object size is larger than size of MemLoc");
7165 int CurArgOffset = VA.getLocMemOffset();
7166 // Objects are right-justified because AIX is big-endian.
7167 if (LocSize > ValSize)
7168 CurArgOffset += LocSize - ValSize;
7169 // Potential tail calls could cause overwriting of argument stack slots.
7170 const bool IsImmutable =
7172 (CallConv == CallingConv::Fast));
7173 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7174 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7175 SDValue ArgValue =
7176 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7177
7178 // While the ABI specifies the argument type is (sign or zero) extended
7179 // out to register width, not all code is compliant. We truncate and
7180 // re-extend to be more forgiving of these callers when the argument type
7181 // is smaller than register width.
7182 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7183 ValVT.isInteger() &&
7184 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7185 // It is possible to have either real integer values
7186 // or integers that were not originally integers.
7187 // In the latter case, these could have came from structs,
7188 // and these integers would not have an extend on the parameter.
7189 // Since these types of integers do not have an extend specified
7190 // in the first place, the type of extend that we do should not matter.
7191 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7192 ? MVT::i8
7193 : ArgVT;
7194 SDValue ArgValueTrunc =
7195 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7196 SDValue ArgValueExt =
7197 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7198 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7199 InVals.push_back(ArgValueExt);
7200 } else {
7201 InVals.push_back(ArgValue);
7202 }
7203 };
7204
7205 // Vector arguments to VaArg functions are passed both on the stack, and
7206 // in any available GPRs. Load the value from the stack and add the GPRs
7207 // as live ins.
7208 if (VA.isMemLoc() && VA.needsCustom()) {
7209 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7210 assert(isVarArg && "Only use custom memloc for vararg.");
7211 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7212 // matching custom RegLocs.
7213 const unsigned OriginalValNo = VA.getValNo();
7214 (void)OriginalValNo;
7215
7216 auto HandleCustomVecRegLoc = [&]() {
7217 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7218 "Missing custom RegLoc.");
7219 VA = ArgLocs[I++];
7220 assert(VA.getValVT().isVector() &&
7221 "Unexpected Val type for custom RegLoc.");
7222 assert(VA.getValNo() == OriginalValNo &&
7223 "ValNo mismatch between custom MemLoc and RegLoc.");
7225 MF.addLiveIn(VA.getLocReg(),
7226 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7227 Subtarget.hasVSX()));
7228 };
7229
7230 HandleMemLoc();
7231 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7232 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7233 // R10.
7234 HandleCustomVecRegLoc();
7235 HandleCustomVecRegLoc();
7236
7237 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7238 // we passed the vector in R5, R6, R7 and R8.
7239 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7240 assert(!IsPPC64 &&
7241 "Only 2 custom RegLocs expected for 64-bit codegen.");
7242 HandleCustomVecRegLoc();
7243 HandleCustomVecRegLoc();
7244 }
7245
7246 continue;
7247 }
7248
7249 if (VA.isRegLoc()) {
7250 if (VA.getValVT().isScalarInteger())
7252 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7253 switch (VA.getValVT().SimpleTy) {
7254 default:
7255 report_fatal_error("Unhandled value type for argument.");
7256 case MVT::f32:
7258 break;
7259 case MVT::f64:
7261 break;
7262 }
7263 } else if (VA.getValVT().isVector()) {
7264 switch (VA.getValVT().SimpleTy) {
7265 default:
7266 report_fatal_error("Unhandled value type for argument.");
7267 case MVT::v16i8:
7269 break;
7270 case MVT::v8i16:
7272 break;
7273 case MVT::v4i32:
7274 case MVT::v2i64:
7275 case MVT::v1i128:
7277 break;
7278 case MVT::v4f32:
7279 case MVT::v2f64:
7281 break;
7282 }
7283 }
7284 }
7285
7286 if (Flags.isByVal() && VA.isMemLoc()) {
7287 const unsigned Size =
7288 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7289 PtrByteSize);
7290 const int FI = MF.getFrameInfo().CreateFixedObject(
7291 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7292 /* IsAliased */ true);
7293 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7294 InVals.push_back(FIN);
7295
7296 continue;
7297 }
7298
7299 if (Flags.isByVal()) {
7300 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7301
7302 const MCPhysReg ArgReg = VA.getLocReg();
7303 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7304
7305 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7306 const int FI = MF.getFrameInfo().CreateFixedObject(
7307 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7308 /* IsAliased */ true);
7309 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7310 InVals.push_back(FIN);
7311
7312 // Add live ins for all the RegLocs for the same ByVal.
7313 const TargetRegisterClass *RegClass =
7314 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7315
7316 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7317 unsigned Offset) {
7318 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7319 // Since the callers side has left justified the aggregate in the
7320 // register, we can simply store the entire register into the stack
7321 // slot.
7322 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7323 // The store to the fixedstack object is needed becuase accessing a
7324 // field of the ByVal will use a gep and load. Ideally we will optimize
7325 // to extracting the value from the register directly, and elide the
7326 // stores when the arguments address is not taken, but that will need to
7327 // be future work.
7328 SDValue Store = DAG.getStore(
7329 CopyFrom.getValue(1), dl, CopyFrom,
7332
7333 MemOps.push_back(Store);
7334 };
7335
7336 unsigned Offset = 0;
7337 HandleRegLoc(VA.getLocReg(), Offset);
7338 Offset += PtrByteSize;
7339 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7340 Offset += PtrByteSize) {
7341 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7342 "RegLocs should be for ByVal argument.");
7343
7344 const CCValAssign RL = ArgLocs[I++];
7345 HandleRegLoc(RL.getLocReg(), Offset);
7347 }
7348
7349 if (Offset != StackSize) {
7350 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7351 "Expected MemLoc for remaining bytes.");
7352 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7353 // Consume the MemLoc.The InVal has already been emitted, so nothing
7354 // more needs to be done.
7355 ++I;
7356 }
7357
7358 continue;
7359 }
7360
7361 if (VA.isRegLoc() && !VA.needsCustom()) {
7362 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7363 Register VReg =
7364 MF.addLiveIn(VA.getLocReg(),
7365 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7366 Subtarget.hasVSX()));
7367 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7368 if (ValVT.isScalarInteger() &&
7369 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7370 ArgValue =
7371 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7372 }
7373 InVals.push_back(ArgValue);
7374 continue;
7375 }
7376 if (VA.isMemLoc()) {
7377 HandleMemLoc();
7378 continue;
7379 }
7380 }
7381
7382 // On AIX a minimum of 8 words is saved to the parameter save area.
7383 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7384 // Area that is at least reserved in the caller of this function.
7385 unsigned CallerReservedArea = std::max<unsigned>(
7386 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7387
7388 // Set the size that is at least reserved in caller of this function. Tail
7389 // call optimized function's reserved stack space needs to be aligned so
7390 // that taking the difference between two stack areas will result in an
7391 // aligned stack.
7392 CallerReservedArea =
7393 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7394 FuncInfo->setMinReservedArea(CallerReservedArea);
7395
7396 if (isVarArg) {
7397 int VAListIndex = 0;
7398 // If any of the optional arguments are passed in register then the fixed
7399 // stack object we spill into is not immutable. Create a fixed stack object
7400 // that overlaps the remainder of the parameter save area.
7401 if (CCInfo.getStackSize() < (LinkageSize + MinParameterSaveArea)) {
7402 unsigned FixedStackSize =
7403 LinkageSize + MinParameterSaveArea - CCInfo.getStackSize();
7404 VAListIndex =
7405 MFI.CreateFixedObject(FixedStackSize, CCInfo.getStackSize(),
7406 /* IsImmutable */ false, /* IsAliased */ true);
7407 } else {
7408 // All the arguments passed through ellipses are on the stack. Create a
7409 // dummy fixed stack object the same size as a pointer since we don't
7410 // know the actual size.
7411 VAListIndex =
7412 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(),
7413 /* IsImmutable */ true, /* IsAliased */ true);
7414 }
7415
7416 FuncInfo->setVarArgsFrameIndex(VAListIndex);
7417 SDValue FIN = DAG.getFrameIndex(VAListIndex, PtrVT);
7418
7419 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7420 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7421
7422 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7423 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7424 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7425
7426 // The fixed integer arguments of a variadic function are stored to the
7427 // VarArgsFrameIndex on the stack so that they may be loaded by
7428 // dereferencing the result of va_next.
7429 for (unsigned
7430 GPRIndex = (CCInfo.getStackSize() - LinkageSize) / PtrByteSize,
7431 Offset = 0;
7432 GPRIndex < NumGPArgRegs; ++GPRIndex, Offset += PtrByteSize) {
7433
7434 const Register VReg =
7435 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7436 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7437
7438 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7439 MachinePointerInfo MPI =
7440 MachinePointerInfo::getFixedStack(MF, VAListIndex, Offset);
7441 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MPI);
7442 MemOps.push_back(Store);
7443 // Increment the address for the next argument to store.
7444 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7445 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7446 }
7447 }
7448
7449 if (!MemOps.empty())
7450 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7451
7452 return Chain;
7453}
7454
7455SDValue PPCTargetLowering::LowerCall_AIX(
7456 SDValue Chain, SDValue Callee, CallFlags CFlags,
7458 const SmallVectorImpl<SDValue> &OutVals,
7459 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7461 const CallBase *CB) const {
7462 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7463 // AIX ABI stack frame layout.
7464
7465 assert((CFlags.CallConv == CallingConv::C ||
7466 CFlags.CallConv == CallingConv::Cold ||
7467 CFlags.CallConv == CallingConv::Fast) &&
7468 "Unexpected calling convention!");
7469
7470 if (CFlags.IsPatchPoint)
7471 report_fatal_error("This call type is unimplemented on AIX.");
7472
7473 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7474
7475 MachineFunction &MF = DAG.getMachineFunction();
7477 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7478 *DAG.getContext());
7479
7480 // Reserve space for the linkage save area (LSA) on the stack.
7481 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7482 // [SP][CR][LR][2 x reserved][TOC].
7483 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7484 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7485 const bool IsPPC64 = Subtarget.isPPC64();
7486 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7487 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7488 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7489 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7490
7491 // The prolog code of the callee may store up to 8 GPR argument registers to
7492 // the stack, allowing va_start to index over them in memory if the callee
7493 // is variadic.
7494 // Because we cannot tell if this is needed on the caller side, we have to
7495 // conservatively assume that it is needed. As such, make sure we have at
7496 // least enough stack space for the caller to store the 8 GPRs.
7497 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7498 const unsigned NumBytes = std::max<unsigned>(
7499 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7500
7501 // Adjust the stack pointer for the new arguments...
7502 // These operations are automatically eliminated by the prolog/epilog pass.
7503 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7504 SDValue CallSeqStart = Chain;
7505
7507 SmallVector<SDValue, 8> MemOpChains;
7508
7509 // Set up a copy of the stack pointer for loading and storing any
7510 // arguments that may not fit in the registers available for argument
7511 // passing.
7512 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7513 : DAG.getRegister(PPC::R1, MVT::i32);
7514
7515 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7516 const unsigned ValNo = ArgLocs[I].getValNo();
7517 SDValue Arg = OutVals[ValNo];
7518 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7519
7520 if (Flags.isByVal()) {
7521 const unsigned ByValSize = Flags.getByValSize();
7522
7523 // Nothing to do for zero-sized ByVals on the caller side.
7524 if (!ByValSize) {
7525 ++I;
7526 continue;
7527 }
7528
7529 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7530 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7531 (LoadOffset != 0)
7532 ? DAG.getObjectPtrOffset(
7533 dl, Arg, TypeSize::getFixed(LoadOffset))
7534 : Arg,
7535 MachinePointerInfo(), VT);
7536 };
7537
7538 unsigned LoadOffset = 0;
7539
7540 // Initialize registers, which are fully occupied by the by-val argument.
7541 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7542 SDValue Load = GetLoad(PtrVT, LoadOffset);
7543 MemOpChains.push_back(Load.getValue(1));
7544 LoadOffset += PtrByteSize;
7545 const CCValAssign &ByValVA = ArgLocs[I++];
7546 assert(ByValVA.getValNo() == ValNo &&
7547 "Unexpected location for pass-by-value argument.");
7548 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7549 }
7550
7551 if (LoadOffset == ByValSize)
7552 continue;
7553
7554 // There must be one more loc to handle the remainder.
7555 assert(ArgLocs[I].getValNo() == ValNo &&
7556 "Expected additional location for by-value argument.");
7557
7558 if (ArgLocs[I].isMemLoc()) {
7559 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7560 const CCValAssign &ByValVA = ArgLocs[I++];
7561 ISD::ArgFlagsTy MemcpyFlags = Flags;
7562 // Only memcpy the bytes that don't pass in register.
7563 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7564 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7565 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7566 dl, Arg, TypeSize::getFixed(LoadOffset))
7567 : Arg,
7569 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7570 CallSeqStart, MemcpyFlags, DAG, dl);
7571 continue;
7572 }
7573
7574 // Initialize the final register residue.
7575 // Any residue that occupies the final by-val arg register must be
7576 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7577 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7578 // 2 and 1 byte loads.
7579 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7580 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7581 "Unexpected register residue for by-value argument.");
7582 SDValue ResidueVal;
7583 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7584 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7585 const MVT VT =
7586 N == 1 ? MVT::i8
7587 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7588 SDValue Load = GetLoad(VT, LoadOffset);
7589 MemOpChains.push_back(Load.getValue(1));
7590 LoadOffset += N;
7591 Bytes += N;
7592
7593 // By-val arguments are passed left-justfied in register.
7594 // Every load here needs to be shifted, otherwise a full register load
7595 // should have been used.
7596 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7597 "Unexpected load emitted during handling of pass-by-value "
7598 "argument.");
7599 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7600 EVT ShiftAmountTy =
7601 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7602 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7603 SDValue ShiftedLoad =
7604 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7605 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7606 ShiftedLoad)
7607 : ShiftedLoad;
7608 }
7609
7610 const CCValAssign &ByValVA = ArgLocs[I++];
7611 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7612 continue;
7613 }
7614
7615 CCValAssign &VA = ArgLocs[I++];
7616 const MVT LocVT = VA.getLocVT();
7617 const MVT ValVT = VA.getValVT();
7618
7619 switch (VA.getLocInfo()) {
7620 default:
7621 report_fatal_error("Unexpected argument extension type.");
7622 case CCValAssign::Full:
7623 break;
7624 case CCValAssign::ZExt:
7625 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7626 break;
7627 case CCValAssign::SExt:
7628 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7629 break;
7630 }
7631
7632 if (VA.isRegLoc() && !VA.needsCustom()) {
7633 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7634 continue;
7635 }
7636
7637 // Vector arguments passed to VarArg functions need custom handling when
7638 // they are passed (at least partially) in GPRs.
7639 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7640 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7641 // Store value to its stack slot.
7642 SDValue PtrOff =
7643 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7644 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7645 SDValue Store =
7646 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7647 MemOpChains.push_back(Store);
7648 const unsigned OriginalValNo = VA.getValNo();
7649 // Then load the GPRs from the stack
7650 unsigned LoadOffset = 0;
7651 auto HandleCustomVecRegLoc = [&]() {
7652 assert(I != E && "Unexpected end of CCvalAssigns.");
7653 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7654 "Expected custom RegLoc.");
7655 CCValAssign RegVA = ArgLocs[I++];
7656 assert(RegVA.getValNo() == OriginalValNo &&
7657 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7658 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7659 DAG.getConstant(LoadOffset, dl, PtrVT));
7660 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7661 MemOpChains.push_back(Load.getValue(1));
7662 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7663 LoadOffset += PtrByteSize;
7664 };
7665
7666 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7667 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7668 // R10.
7669 HandleCustomVecRegLoc();
7670 HandleCustomVecRegLoc();
7671
7672 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7673 ArgLocs[I].getValNo() == OriginalValNo) {
7674 assert(!IsPPC64 &&
7675 "Only 2 custom RegLocs expected for 64-bit codegen.");
7676 HandleCustomVecRegLoc();
7677 HandleCustomVecRegLoc();
7678 }
7679
7680 continue;
7681 }
7682
7683 if (VA.isMemLoc()) {
7684 SDValue PtrOff =
7685 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7686 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7687 MemOpChains.push_back(
7688 DAG.getStore(Chain, dl, Arg, PtrOff,
7690 Subtarget.getFrameLowering()->getStackAlign()));
7691
7692 continue;
7693 }
7694
7695 if (!ValVT.isFloatingPoint())
7697 "Unexpected register handling for calling convention.");
7698
7699 // Custom handling is used for GPR initializations for vararg float
7700 // arguments.
7701 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7702 LocVT.isInteger() &&
7703 "Custom register handling only expected for VarArg.");
7704
7705 SDValue ArgAsInt =
7706 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7707
7708 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7709 // f32 in 32-bit GPR
7710 // f64 in 64-bit GPR
7711 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7712 else if (Arg.getValueType().getFixedSizeInBits() <
7713 LocVT.getFixedSizeInBits())
7714 // f32 in 64-bit GPR.
7715 RegsToPass.push_back(std::make_pair(
7716 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7717 else {
7718 // f64 in two 32-bit GPRs
7719 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7720 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7721 "Unexpected custom register for argument!");
7722 CCValAssign &GPR1 = VA;
7723 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7724 DAG.getConstant(32, dl, MVT::i8));
7725 RegsToPass.push_back(std::make_pair(
7726 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7727
7728 if (I != E) {
7729 // If only 1 GPR was available, there will only be one custom GPR and
7730 // the argument will also pass in memory.
7731 CCValAssign &PeekArg = ArgLocs[I];
7732 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7733 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7734 CCValAssign &GPR2 = ArgLocs[I++];
7735 RegsToPass.push_back(std::make_pair(
7736 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7737 }
7738 }
7739 }
7740 }
7741
7742 if (!MemOpChains.empty())
7743 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7744
7745 // For indirect calls, we need to save the TOC base to the stack for
7746 // restoration after the call.
7747 if (CFlags.IsIndirect) {
7748 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7749 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7750 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7751 const MVT PtrVT = Subtarget.getScalarIntVT();
7752 const unsigned TOCSaveOffset =
7753 Subtarget.getFrameLowering()->getTOCSaveOffset();
7754
7755 setUsesTOCBasePtr(DAG);
7756 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7757 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7758 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7759 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7760 Chain = DAG.getStore(
7761 Val.getValue(1), dl, Val, AddPtr,
7762 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7763 }
7764
7765 // Build a sequence of copy-to-reg nodes chained together with token chain
7766 // and flag operands which copy the outgoing args into the appropriate regs.
7767 SDValue InGlue;
7768 for (auto Reg : RegsToPass) {
7769 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7770 InGlue = Chain.getValue(1);
7771 }
7772
7773 const int SPDiff = 0;
7774 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7775 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7776}
7777
7778bool
7779PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7780 MachineFunction &MF, bool isVarArg,
7783 const Type *RetTy) const {
7785 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7786 return CCInfo.CheckReturn(
7787 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7789 : RetCC_PPC);
7790}
7791
7792SDValue
7793PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7794 bool isVarArg,
7796 const SmallVectorImpl<SDValue> &OutVals,
7797 const SDLoc &dl, SelectionDAG &DAG) const {
7799 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7800 *DAG.getContext());
7801 CCInfo.AnalyzeReturn(Outs,
7802 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7804 : RetCC_PPC);
7805
7806 SDValue Glue;
7807 SmallVector<SDValue, 4> RetOps(1, Chain);
7808
7809 // Copy the result values into the output registers.
7810 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7811 CCValAssign &VA = RVLocs[i];
7812 assert(VA.isRegLoc() && "Can only return in registers!");
7813
7814 SDValue Arg = OutVals[RealResIdx];
7815
7816 switch (VA.getLocInfo()) {
7817 default: llvm_unreachable("Unknown loc info!");
7818 case CCValAssign::Full: break;
7819 case CCValAssign::AExt:
7820 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7821 break;
7822 case CCValAssign::ZExt:
7823 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7824 break;
7825 case CCValAssign::SExt:
7826 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7827 break;
7828 }
7829 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7830 bool isLittleEndian = Subtarget.isLittleEndian();
7831 // Legalize ret f64 -> ret 2 x i32.
7832 SDValue SVal =
7833 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7834 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7835 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7836 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7837 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7838 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7839 Glue = Chain.getValue(1);
7840 VA = RVLocs[++i]; // skip ahead to next loc
7841 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7842 } else
7843 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7844 Glue = Chain.getValue(1);
7845 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7846 }
7847
7848 RetOps[0] = Chain; // Update chain.
7849
7850 // Add the glue if we have it.
7851 if (Glue.getNode())
7852 RetOps.push_back(Glue);
7853
7854 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7855}
7856
7857SDValue
7858PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7859 SelectionDAG &DAG) const {
7860 SDLoc dl(Op);
7861
7862 // Get the correct type for integers.
7863 EVT IntVT = Op.getValueType();
7864
7865 // Get the inputs.
7866 SDValue Chain = Op.getOperand(0);
7867 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7868 // Build a DYNAREAOFFSET node.
7869 SDValue Ops[2] = {Chain, FPSIdx};
7870 SDVTList VTs = DAG.getVTList(IntVT);
7871 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7872}
7873
7874SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7875 SelectionDAG &DAG) const {
7876 // When we pop the dynamic allocation we need to restore the SP link.
7877 SDLoc dl(Op);
7878
7879 // Get the correct type for pointers.
7880 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7881
7882 // Construct the stack pointer operand.
7883 bool isPPC64 = Subtarget.isPPC64();
7884 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7885 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7886
7887 // Get the operands for the STACKRESTORE.
7888 SDValue Chain = Op.getOperand(0);
7889 SDValue SaveSP = Op.getOperand(1);
7890
7891 // Load the old link SP.
7892 SDValue LoadLinkSP =
7893 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7894
7895 // Restore the stack pointer.
7896 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7897
7898 // Store the old link SP.
7899 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7900}
7901
7902SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7903 MachineFunction &MF = DAG.getMachineFunction();
7904 bool isPPC64 = Subtarget.isPPC64();
7905 EVT PtrVT = getPointerTy(MF.getDataLayout());
7906
7907 // Get current frame pointer save index. The users of this index will be
7908 // primarily DYNALLOC instructions.
7909 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7910 int RASI = FI->getReturnAddrSaveIndex();
7911
7912 // If the frame pointer save index hasn't been defined yet.
7913 if (!RASI) {
7914 // Find out what the fix offset of the frame pointer save area.
7915 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7916 // Allocate the frame index for frame pointer save area.
7917 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7918 // Save the result.
7919 FI->setReturnAddrSaveIndex(RASI);
7920 }
7921 return DAG.getFrameIndex(RASI, PtrVT);
7922}
7923
7924SDValue
7925PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7926 MachineFunction &MF = DAG.getMachineFunction();
7927 bool isPPC64 = Subtarget.isPPC64();
7928 EVT PtrVT = getPointerTy(MF.getDataLayout());
7929
7930 // Get current frame pointer save index. The users of this index will be
7931 // primarily DYNALLOC instructions.
7932 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7933 int FPSI = FI->getFramePointerSaveIndex();
7934
7935 // If the frame pointer save index hasn't been defined yet.
7936 if (!FPSI) {
7937 // Find out what the fix offset of the frame pointer save area.
7938 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7939 // Allocate the frame index for frame pointer save area.
7940 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7941 // Save the result.
7942 FI->setFramePointerSaveIndex(FPSI);
7943 }
7944 return DAG.getFrameIndex(FPSI, PtrVT);
7945}
7946
7947SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7948 SelectionDAG &DAG) const {
7949 MachineFunction &MF = DAG.getMachineFunction();
7950 // Get the inputs.
7951 SDValue Chain = Op.getOperand(0);
7952 SDValue Size = Op.getOperand(1);
7953 SDLoc dl(Op);
7954
7955 // Get the correct type for pointers.
7956 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7957 // Negate the size.
7958 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7959 DAG.getConstant(0, dl, PtrVT), Size);
7960 // Construct a node for the frame pointer save index.
7961 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7962 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7963 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7964 if (hasInlineStackProbe(MF))
7965 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7966 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7967}
7968
7969SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7970 SelectionDAG &DAG) const {
7971 MachineFunction &MF = DAG.getMachineFunction();
7972
7973 bool isPPC64 = Subtarget.isPPC64();
7974 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7975
7976 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7977 return DAG.getFrameIndex(FI, PtrVT);
7978}
7979
7980SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7981 SelectionDAG &DAG) const {
7982 SDLoc DL(Op);
7983 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7984 DAG.getVTList(MVT::i32, MVT::Other),
7985 Op.getOperand(0), Op.getOperand(1));
7986}
7987
7988SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7989 SelectionDAG &DAG) const {
7990 SDLoc DL(Op);
7991 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7992 Op.getOperand(0), Op.getOperand(1));
7993}
7994
7995SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7996 if (Op.getValueType().isVector())
7997 return LowerVectorLoad(Op, DAG);
7998
7999 assert(Op.getValueType() == MVT::i1 &&
8000 "Custom lowering only for i1 loads");
8001
8002 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8003
8004 SDLoc dl(Op);
8005 LoadSDNode *LD = cast<LoadSDNode>(Op);
8006
8007 SDValue Chain = LD->getChain();
8008 SDValue BasePtr = LD->getBasePtr();
8009 MachineMemOperand *MMO = LD->getMemOperand();
8010
8011 SDValue NewLD =
8012 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8013 BasePtr, MVT::i8, MMO);
8014 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8015
8016 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8017 return DAG.getMergeValues(Ops, dl);
8018}
8019
8020SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8021 if (Op.getOperand(1).getValueType().isVector())
8022 return LowerVectorStore(Op, DAG);
8023
8024 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8025 "Custom lowering only for i1 stores");
8026
8027 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8028
8029 SDLoc dl(Op);
8030 StoreSDNode *ST = cast<StoreSDNode>(Op);
8031
8032 SDValue Chain = ST->getChain();
8033 SDValue BasePtr = ST->getBasePtr();
8034 SDValue Value = ST->getValue();
8035 MachineMemOperand *MMO = ST->getMemOperand();
8036
8038 Value);
8039 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8040}
8041
8042// FIXME: Remove this once the ANDI glue bug is fixed:
8043SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8044 assert(Op.getValueType() == MVT::i1 &&
8045 "Custom lowering only for i1 results");
8046
8047 SDLoc DL(Op);
8048 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8049}
8050
8051SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8052 SelectionDAG &DAG) const {
8053
8054 // Implements a vector truncate that fits in a vector register as a shuffle.
8055 // We want to legalize vector truncates down to where the source fits in
8056 // a vector register (and target is therefore smaller than vector register
8057 // size). At that point legalization will try to custom lower the sub-legal
8058 // result and get here - where we can contain the truncate as a single target
8059 // operation.
8060
8061 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8062 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8063 //
8064 // We will implement it for big-endian ordering as this (where x denotes
8065 // undefined):
8066 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8067 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8068 //
8069 // The same operation in little-endian ordering will be:
8070 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8071 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8072
8073 EVT TrgVT = Op.getValueType();
8074 assert(TrgVT.isVector() && "Vector type expected.");
8075 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8076 EVT EltVT = TrgVT.getVectorElementType();
8077 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8078 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8080 return SDValue();
8081
8082 SDValue N1 = Op.getOperand(0);
8083 EVT SrcVT = N1.getValueType();
8084 unsigned SrcSize = SrcVT.getSizeInBits();
8085 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8088 return SDValue();
8089 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8090 return SDValue();
8091
8092 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8093 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8094
8095 SDLoc DL(Op);
8096 SDValue Op1, Op2;
8097 if (SrcSize == 256) {
8098 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8099 EVT SplitVT =
8101 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8102 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8103 DAG.getConstant(0, DL, VecIdxTy));
8104 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8105 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8106 }
8107 else {
8108 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8109 Op2 = DAG.getUNDEF(WideVT);
8110 }
8111
8112 // First list the elements we want to keep.
8113 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8114 SmallVector<int, 16> ShuffV;
8115 if (Subtarget.isLittleEndian())
8116 for (unsigned i = 0; i < TrgNumElts; ++i)
8117 ShuffV.push_back(i * SizeMult);
8118 else
8119 for (unsigned i = 1; i <= TrgNumElts; ++i)
8120 ShuffV.push_back(i * SizeMult - 1);
8121
8122 // Populate the remaining elements with undefs.
8123 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8124 // ShuffV.push_back(i + WideNumElts);
8125 ShuffV.push_back(WideNumElts + 1);
8126
8127 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8128 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8129 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8130}
8131
8132/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8133/// possible.
8134SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8135 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8136 EVT ResVT = Op.getValueType();
8137 EVT CmpVT = Op.getOperand(0).getValueType();
8138 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8139 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8140 SDLoc dl(Op);
8141
8142 // Without power9-vector, we don't have native instruction for f128 comparison.
8143 // Following transformation to libcall is needed for setcc:
8144 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8145 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8146 SDValue Z = DAG.getSetCC(
8147 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8148 LHS, RHS, CC);
8149 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8150 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8151 }
8152
8153 // Not FP, or using SPE? Not a fsel.
8154 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8155 Subtarget.hasSPE())
8156 return Op;
8157
8158 SDNodeFlags Flags = Op.getNode()->getFlags();
8159
8160 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8161 // presence of infinities.
8162 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8163 switch (CC) {
8164 default:
8165 break;
8166 case ISD::SETOGT:
8167 case ISD::SETGT:
8168 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8169 case ISD::SETOLT:
8170 case ISD::SETLT:
8171 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8172 }
8173 }
8174
8175 // We might be able to do better than this under some circumstances, but in
8176 // general, fsel-based lowering of select is a finite-math-only optimization.
8177 // For more information, see section F.3 of the 2.06 ISA specification.
8178 // With ISA 3.0
8179 if (!Flags.hasNoInfs() || !Flags.hasNoNaNs() || ResVT == MVT::f128)
8180 return Op;
8181
8182 // If the RHS of the comparison is a 0.0, we don't need to do the
8183 // subtraction at all.
8184 SDValue Sel1;
8186 switch (CC) {
8187 default: break; // SETUO etc aren't handled by fsel.
8188 case ISD::SETNE:
8189 std::swap(TV, FV);
8190 [[fallthrough]];
8191 case ISD::SETEQ:
8192 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8193 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8194 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8195 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8196 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8197 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8198 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8199 case ISD::SETULT:
8200 case ISD::SETLT:
8201 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8202 [[fallthrough]];
8203 case ISD::SETOGE:
8204 case ISD::SETGE:
8205 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8206 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8207 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8208 case ISD::SETUGT:
8209 case ISD::SETGT:
8210 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8211 [[fallthrough]];
8212 case ISD::SETOLE:
8213 case ISD::SETLE:
8214 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8215 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8216 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8217 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8218 }
8219
8220 SDValue Cmp;
8221 switch (CC) {
8222 default: break; // SETUO etc aren't handled by fsel.
8223 case ISD::SETNE:
8224 std::swap(TV, FV);
8225 [[fallthrough]];
8226 case ISD::SETEQ:
8227 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8228 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8229 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8230 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8231 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8232 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8233 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8234 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8235 case ISD::SETULT:
8236 case ISD::SETLT:
8237 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8238 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8239 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8240 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8241 case ISD::SETOGE:
8242 case ISD::SETGE:
8243 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8244 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8245 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8246 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8247 case ISD::SETUGT:
8248 case ISD::SETGT:
8249 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8250 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8251 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8252 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8253 case ISD::SETOLE:
8254 case ISD::SETLE:
8255 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8256 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8257 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8258 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8259 }
8260 return Op;
8261}
8262
8263static unsigned getPPCStrictOpcode(unsigned Opc) {
8264 switch (Opc) {
8265 default:
8266 llvm_unreachable("No strict version of this opcode!");
8267 case PPCISD::FCTIDZ:
8268 return PPCISD::STRICT_FCTIDZ;
8269 case PPCISD::FCTIWZ:
8270 return PPCISD::STRICT_FCTIWZ;
8271 case PPCISD::FCTIDUZ:
8272 return PPCISD::STRICT_FCTIDUZ;
8273 case PPCISD::FCTIWUZ:
8274 return PPCISD::STRICT_FCTIWUZ;
8275 case PPCISD::FCFID:
8276 return PPCISD::STRICT_FCFID;
8277 case PPCISD::FCFIDU:
8278 return PPCISD::STRICT_FCFIDU;
8279 case PPCISD::FCFIDS:
8280 return PPCISD::STRICT_FCFIDS;
8281 case PPCISD::FCFIDUS:
8282 return PPCISD::STRICT_FCFIDUS;
8283 }
8284}
8285
8287 const PPCSubtarget &Subtarget) {
8288 SDLoc dl(Op);
8289 bool IsStrict = Op->isStrictFPOpcode();
8290 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8291 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8292
8293 // TODO: Any other flags to propagate?
8294 SDNodeFlags Flags;
8295 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8296
8297 // For strict nodes, source is the second operand.
8298 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8299 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8300 MVT DestTy = Op.getSimpleValueType();
8301 assert(Src.getValueType().isFloatingPoint() &&
8302 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8303 DestTy == MVT::i64) &&
8304 "Invalid FP_TO_INT types");
8305 if (Src.getValueType() == MVT::f32) {
8306 if (IsStrict) {
8307 Src =
8309 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8310 Chain = Src.getValue(1);
8311 } else
8312 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8313 }
8314 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8315 DestTy = Subtarget.getScalarIntVT();
8316 unsigned Opc = ISD::DELETED_NODE;
8317 switch (DestTy.SimpleTy) {
8318 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8319 case MVT::i32:
8320 Opc = IsSigned ? PPCISD::FCTIWZ
8321 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8322 break;
8323 case MVT::i64:
8324 assert((IsSigned || Subtarget.hasFPCVT()) &&
8325 "i64 FP_TO_UINT is supported only with FPCVT");
8326 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8327 }
8328 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8329 SDValue Conv;
8330 if (IsStrict) {
8332 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8333 Flags);
8334 } else {
8335 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8336 }
8337 return Conv;
8338}
8339
8340void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8341 SelectionDAG &DAG,
8342 const SDLoc &dl) const {
8343 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8344 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8345 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8346 bool IsStrict = Op->isStrictFPOpcode();
8347
8348 // Convert the FP value to an int value through memory.
8349 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8350 (IsSigned || Subtarget.hasFPCVT());
8351 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8352 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8353 MachinePointerInfo MPI =
8355
8356 // Emit a store to the stack slot.
8357 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8358 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8359 if (i32Stack) {
8360 MachineFunction &MF = DAG.getMachineFunction();
8361 Alignment = Align(4);
8362 MachineMemOperand *MMO =
8363 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8364 SDValue Ops[] = { Chain, Tmp, FIPtr };
8365 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8366 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8367 } else
8368 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8369
8370 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8371 // add in a bias on big endian.
8372 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8373 !Subtarget.isLittleEndian()) {
8374 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8375 DAG.getConstant(4, dl, FIPtr.getValueType()));
8376 MPI = MPI.getWithOffset(4);
8377 }
8378
8379 RLI.Chain = Chain;
8380 RLI.Ptr = FIPtr;
8381 RLI.MPI = MPI;
8382 RLI.Alignment = Alignment;
8383}
8384
8385/// Custom lowers floating point to integer conversions to use
8386/// the direct move instructions available in ISA 2.07 to avoid the
8387/// need for load/store combinations.
8388SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8389 SelectionDAG &DAG,
8390 const SDLoc &dl) const {
8391 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8392 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8393 if (Op->isStrictFPOpcode())
8394 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8395 else
8396 return Mov;
8397}
8398
8399SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8400 const SDLoc &dl) const {
8401 bool IsStrict = Op->isStrictFPOpcode();
8402 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8403 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8404 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8405 EVT SrcVT = Src.getValueType();
8406 EVT DstVT = Op.getValueType();
8407
8408 // FP to INT conversions are legal for f128.
8409 if (SrcVT == MVT::f128)
8410 return Subtarget.hasP9Vector() ? Op : SDValue();
8411
8412 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8413 // PPC (the libcall is not available).
8414 if (SrcVT == MVT::ppcf128) {
8415 if (DstVT == MVT::i32) {
8416 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8417 // set other fast-math flags to FP operations in both strict and
8418 // non-strict cases. (FP_TO_SINT, FSUB)
8419 SDNodeFlags Flags;
8420 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8421
8422 if (IsSigned) {
8423 SDValue Lo, Hi;
8424 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8425
8426 // Add the two halves of the long double in round-to-zero mode, and use
8427 // a smaller FP_TO_SINT.
8428 if (IsStrict) {
8429 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8430 DAG.getVTList(MVT::f64, MVT::Other),
8431 {Op.getOperand(0), Lo, Hi}, Flags);
8432 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8433 DAG.getVTList(MVT::i32, MVT::Other),
8434 {Res.getValue(1), Res}, Flags);
8435 } else {
8436 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8437 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8438 }
8439 } else {
8440 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8441 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8442 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8443 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8444 if (IsStrict) {
8445 // Sel = Src < 0x80000000
8446 // FltOfs = select Sel, 0.0, 0x80000000
8447 // IntOfs = select Sel, 0, 0x80000000
8448 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8449 SDValue Chain = Op.getOperand(0);
8450 EVT SetCCVT =
8451 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8452 EVT DstSetCCVT =
8453 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8454 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8455 Chain, true);
8456 Chain = Sel.getValue(1);
8457
8458 SDValue FltOfs = DAG.getSelect(
8459 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8460 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8461
8462 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8463 DAG.getVTList(SrcVT, MVT::Other),
8464 {Chain, Src, FltOfs}, Flags);
8465 Chain = Val.getValue(1);
8466 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8467 DAG.getVTList(DstVT, MVT::Other),
8468 {Chain, Val}, Flags);
8469 Chain = SInt.getValue(1);
8470 SDValue IntOfs = DAG.getSelect(
8471 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8472 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8473 return DAG.getMergeValues({Result, Chain}, dl);
8474 } else {
8475 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8476 // FIXME: generated code sucks.
8477 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8478 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8479 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8480 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8481 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8482 }
8483 }
8484 }
8485
8486 return SDValue();
8487 }
8488
8489 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8490 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8491
8492 ReuseLoadInfo RLI;
8493 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8494
8495 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8496 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8497}
8498
8499// We're trying to insert a regular store, S, and then a load, L. If the
8500// incoming value, O, is a load, we might just be able to have our load use the
8501// address used by O. However, we don't know if anything else will store to
8502// that address before we can load from it. To prevent this situation, we need
8503// to insert our load, L, into the chain as a peer of O. To do this, we give L
8504// the same chain operand as O, we create a token factor from the chain results
8505// of O and L, and we replace all uses of O's chain result with that token
8506// factor (this last part is handled by makeEquivalentMemoryOrdering).
8507bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8508 ReuseLoadInfo &RLI,
8509 SelectionDAG &DAG,
8510 ISD::LoadExtType ET) const {
8511 // Conservatively skip reusing for constrained FP nodes.
8512 if (Op->isStrictFPOpcode())
8513 return false;
8514
8515 SDLoc dl(Op);
8516 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8517 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8518 if (ET == ISD::NON_EXTLOAD &&
8519 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8520 isOperationLegalOrCustom(Op.getOpcode(),
8521 Op.getOperand(0).getValueType())) {
8522
8523 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8524 return true;
8525 }
8526
8527 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8528 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8529 LD->isNonTemporal())
8530 return false;
8531 if (LD->getMemoryVT() != MemVT)
8532 return false;
8533
8534 // If the result of the load is an illegal type, then we can't build a
8535 // valid chain for reuse since the legalised loads and token factor node that
8536 // ties the legalised loads together uses a different output chain then the
8537 // illegal load.
8538 if (!isTypeLegal(LD->getValueType(0)))
8539 return false;
8540
8541 RLI.Ptr = LD->getBasePtr();
8542 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8543 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8544 "Non-pre-inc AM on PPC?");
8545 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8546 LD->getOffset());
8547 }
8548
8549 RLI.Chain = LD->getChain();
8550 RLI.MPI = LD->getPointerInfo();
8551 RLI.IsDereferenceable = LD->isDereferenceable();
8552 RLI.IsInvariant = LD->isInvariant();
8553 RLI.Alignment = LD->getAlign();
8554 RLI.AAInfo = LD->getAAInfo();
8555 RLI.Ranges = LD->getRanges();
8556
8557 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8558 return true;
8559}
8560
8561/// Analyze profitability of direct move
8562/// prefer float load to int load plus direct move
8563/// when there is no integer use of int load
8564bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8565 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8566 if (Origin->getOpcode() != ISD::LOAD)
8567 return true;
8568
8569 // If there is no LXSIBZX/LXSIHZX, like Power8,
8570 // prefer direct move if the memory size is 1 or 2 bytes.
8571 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8572 if (!Subtarget.hasP9Vector() &&
8573 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8574 return true;
8575
8576 for (SDUse &Use : Origin->uses()) {
8577
8578 // Only look at the users of the loaded value.
8579 if (Use.getResNo() != 0)
8580 continue;
8581
8582 SDNode *User = Use.getUser();
8583 if (User->getOpcode() != ISD::SINT_TO_FP &&
8584 User->getOpcode() != ISD::UINT_TO_FP &&
8585 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8586 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8587 return true;
8588 }
8589
8590 return false;
8591}
8592
8594 const PPCSubtarget &Subtarget,
8595 SDValue Chain = SDValue()) {
8596 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8597 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8598 SDLoc dl(Op);
8599
8600 // TODO: Any other flags to propagate?
8601 SDNodeFlags Flags;
8602 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8603
8604 // If we have FCFIDS, then use it when converting to single-precision.
8605 // Otherwise, convert to double-precision and then round.
8606 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8607 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8608 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8609 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8610 if (Op->isStrictFPOpcode()) {
8611 if (!Chain)
8612 Chain = Op.getOperand(0);
8613 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8614 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8615 } else
8616 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8617}
8618
8619/// Custom lowers integer to floating point conversions to use
8620/// the direct move instructions available in ISA 2.07 to avoid the
8621/// need for load/store combinations.
8622SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8623 SelectionDAG &DAG,
8624 const SDLoc &dl) const {
8625 assert((Op.getValueType() == MVT::f32 ||
8626 Op.getValueType() == MVT::f64) &&
8627 "Invalid floating point type as target of conversion");
8628 assert(Subtarget.hasFPCVT() &&
8629 "Int to FP conversions with direct moves require FPCVT");
8630 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8631 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8632 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8633 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8634 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8635 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8636 return convertIntToFP(Op, Mov, DAG, Subtarget);
8637}
8638
8639static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8640
8641 EVT VecVT = Vec.getValueType();
8642 assert(VecVT.isVector() && "Expected a vector type.");
8643 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8644
8645 EVT EltVT = VecVT.getVectorElementType();
8646 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8647 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8648
8649 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8650 SmallVector<SDValue, 16> Ops(NumConcat);
8651 Ops[0] = Vec;
8652 SDValue UndefVec = DAG.getUNDEF(VecVT);
8653 for (unsigned i = 1; i < NumConcat; ++i)
8654 Ops[i] = UndefVec;
8655
8656 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8657}
8658
8659SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8660 const SDLoc &dl) const {
8661 bool IsStrict = Op->isStrictFPOpcode();
8662 unsigned Opc = Op.getOpcode();
8663 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8666 "Unexpected conversion type");
8667 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8668 "Supports conversions to v2f64/v4f32 only.");
8669
8670 // TODO: Any other flags to propagate?
8671 SDNodeFlags Flags;
8672 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8673
8674 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8675 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8676
8677 SDValue Wide = widenVec(DAG, Src, dl);
8678 EVT WideVT = Wide.getValueType();
8679 unsigned WideNumElts = WideVT.getVectorNumElements();
8680 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8681
8682 SmallVector<int, 16> ShuffV;
8683 for (unsigned i = 0; i < WideNumElts; ++i)
8684 ShuffV.push_back(i + WideNumElts);
8685
8686 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8687 int SaveElts = FourEltRes ? 4 : 2;
8688 if (Subtarget.isLittleEndian())
8689 for (int i = 0; i < SaveElts; i++)
8690 ShuffV[i * Stride] = i;
8691 else
8692 for (int i = 1; i <= SaveElts; i++)
8693 ShuffV[i * Stride - 1] = i - 1;
8694
8695 SDValue ShuffleSrc2 =
8696 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8697 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8698
8699 SDValue Extend;
8700 if (SignedConv) {
8701 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8702 EVT ExtVT = Src.getValueType();
8703 if (Subtarget.hasP9Altivec())
8704 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8705 IntermediateVT.getVectorNumElements());
8706
8707 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8708 DAG.getValueType(ExtVT));
8709 } else
8710 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8711
8712 if (IsStrict)
8713 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8714 {Op.getOperand(0), Extend}, Flags);
8715
8716 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8717}
8718
8719SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8720 SelectionDAG &DAG) const {
8721 SDLoc dl(Op);
8722 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8723 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8724 bool IsStrict = Op->isStrictFPOpcode();
8725 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8726 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8727
8728 // TODO: Any other flags to propagate?
8729 SDNodeFlags Flags;
8730 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8731
8732 EVT InVT = Src.getValueType();
8733 EVT OutVT = Op.getValueType();
8734 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8735 isOperationCustom(Op.getOpcode(), InVT))
8736 return LowerINT_TO_FPVector(Op, DAG, dl);
8737
8738 // Conversions to f128 are legal.
8739 if (Op.getValueType() == MVT::f128)
8740 return Subtarget.hasP9Vector() ? Op : SDValue();
8741
8742 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8743 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8744 return SDValue();
8745
8746 if (Src.getValueType() == MVT::i1) {
8747 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8748 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8749 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8750 if (IsStrict)
8751 return DAG.getMergeValues({Sel, Chain}, dl);
8752 else
8753 return Sel;
8754 }
8755
8756 // If we have direct moves, we can do all the conversion, skip the store/load
8757 // however, without FPCVT we can't do most conversions.
8758 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8759 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8760 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8761
8762 assert((IsSigned || Subtarget.hasFPCVT()) &&
8763 "UINT_TO_FP is supported only with FPCVT");
8764
8765 if (Src.getValueType() == MVT::i64) {
8766 SDValue SINT = Src;
8767 // When converting to single-precision, we actually need to convert
8768 // to double-precision first and then round to single-precision.
8769 // To avoid double-rounding effects during that operation, we have
8770 // to prepare the input operand. Bits that might be truncated when
8771 // converting to double-precision are replaced by a bit that won't
8772 // be lost at this stage, but is below the single-precision rounding
8773 // position.
8774 //
8775 // However, if afn is in effect, accept double
8776 // rounding to avoid the extra overhead.
8777 // FIXME: Currently INT_TO_FP can't support fast math flags because
8778 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8779 // false.
8780 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8781 !Op->getFlags().hasApproximateFuncs()) {
8782
8783 // Twiddle input to make sure the low 11 bits are zero. (If this
8784 // is the case, we are guaranteed the value will fit into the 53 bit
8785 // mantissa of an IEEE double-precision value without rounding.)
8786 // If any of those low 11 bits were not zero originally, make sure
8787 // bit 12 (value 2048) is set instead, so that the final rounding
8788 // to single-precision gets the correct result.
8789 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8790 SINT, DAG.getConstant(2047, dl, MVT::i64));
8791 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8792 Round, DAG.getConstant(2047, dl, MVT::i64));
8793 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8794 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8795 DAG.getSignedConstant(-2048, dl, MVT::i64));
8796
8797 // However, we cannot use that value unconditionally: if the magnitude
8798 // of the input value is small, the bit-twiddling we did above might
8799 // end up visibly changing the output. Fortunately, in that case, we
8800 // don't need to twiddle bits since the original input will convert
8801 // exactly to double-precision floating-point already. Therefore,
8802 // construct a conditional to use the original value if the top 11
8803 // bits are all sign-bit copies, and use the rounded value computed
8804 // above otherwise.
8805 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8806 SINT, DAG.getConstant(53, dl, MVT::i32));
8807 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8808 Cond, DAG.getConstant(1, dl, MVT::i64));
8809 Cond = DAG.getSetCC(
8810 dl,
8811 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8812 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8813
8814 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8815 }
8816
8817 ReuseLoadInfo RLI;
8818 SDValue Bits;
8819
8820 MachineFunction &MF = DAG.getMachineFunction();
8821 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8822 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8823 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8824 if (RLI.ResChain)
8825 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8826 } else if (Subtarget.hasLFIWAX() &&
8827 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8828 MachineMemOperand *MMO =
8830 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8831 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8832 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8833 DAG.getVTList(MVT::f64, MVT::Other),
8834 Ops, MVT::i32, MMO);
8835 if (RLI.ResChain)
8836 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8837 } else if (Subtarget.hasFPCVT() &&
8838 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8839 MachineMemOperand *MMO =
8841 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8842 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8843 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8844 DAG.getVTList(MVT::f64, MVT::Other),
8845 Ops, MVT::i32, MMO);
8846 if (RLI.ResChain)
8847 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8848 } else if (((Subtarget.hasLFIWAX() &&
8849 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8850 (Subtarget.hasFPCVT() &&
8851 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8852 SINT.getOperand(0).getValueType() == MVT::i32) {
8853 MachineFrameInfo &MFI = MF.getFrameInfo();
8854 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8855
8856 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8857 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8858
8859 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8861 DAG.getMachineFunction(), FrameIdx));
8862 Chain = Store;
8863
8864 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8865 "Expected an i32 store");
8866
8867 RLI.Ptr = FIdx;
8868 RLI.Chain = Chain;
8869 RLI.MPI =
8871 RLI.Alignment = Align(4);
8872
8873 MachineMemOperand *MMO =
8875 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8876 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8878 PPCISD::LFIWZX : PPCISD::LFIWAX,
8879 dl, DAG.getVTList(MVT::f64, MVT::Other),
8880 Ops, MVT::i32, MMO);
8881 Chain = Bits.getValue(1);
8882 } else
8883 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8884
8885 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8886 if (IsStrict)
8887 Chain = FP.getValue(1);
8888
8889 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8890 if (IsStrict)
8891 FP = DAG.getNode(
8892 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8893 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8894 Flags);
8895 else
8896 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8897 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8898 }
8899 return FP;
8900 }
8901
8902 assert(Src.getValueType() == MVT::i32 &&
8903 "Unhandled INT_TO_FP type in custom expander!");
8904 // Since we only generate this in 64-bit mode, we can take advantage of
8905 // 64-bit registers. In particular, sign extend the input value into the
8906 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8907 // then lfd it and fcfid it.
8908 MachineFunction &MF = DAG.getMachineFunction();
8909 MachineFrameInfo &MFI = MF.getFrameInfo();
8910 EVT PtrVT = getPointerTy(MF.getDataLayout());
8911
8912 SDValue Ld;
8913 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8914 ReuseLoadInfo RLI;
8915 bool ReusingLoad;
8916 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8917 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8918 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8919
8920 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8922 DAG.getMachineFunction(), FrameIdx));
8923 Chain = Store;
8924
8925 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8926 "Expected an i32 store");
8927
8928 RLI.Ptr = FIdx;
8929 RLI.Chain = Chain;
8930 RLI.MPI =
8932 RLI.Alignment = Align(4);
8933 }
8934
8935 MachineMemOperand *MMO =
8937 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8938 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8939 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8940 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8941 MVT::i32, MMO);
8942 Chain = Ld.getValue(1);
8943 if (ReusingLoad && RLI.ResChain) {
8944 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
8945 }
8946 } else {
8947 assert(Subtarget.isPPC64() &&
8948 "i32->FP without LFIWAX supported only on PPC64");
8949
8950 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8951 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8952
8953 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8954
8955 // STD the extended value into the stack slot.
8956 SDValue Store = DAG.getStore(
8957 Chain, dl, Ext64, FIdx,
8959 Chain = Store;
8960
8961 // Load the value as a double.
8962 Ld = DAG.getLoad(
8963 MVT::f64, dl, Chain, FIdx,
8965 Chain = Ld.getValue(1);
8966 }
8967
8968 // FCFID it and return it.
8969 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8970 if (IsStrict)
8971 Chain = FP.getValue(1);
8972 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8973 if (IsStrict)
8974 FP = DAG.getNode(
8975 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8976 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
8977 else
8978 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8979 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8980 }
8981 return FP;
8982}
8983
8984SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
8985 SelectionDAG &DAG) const {
8986 SDLoc Dl(Op);
8987 MachineFunction &MF = DAG.getMachineFunction();
8988 EVT PtrVT = getPointerTy(MF.getDataLayout());
8989 SDValue Chain = Op.getOperand(0);
8990
8991 // If requested mode is constant, just use simpler mtfsb/mffscrni
8992 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
8993 uint64_t Mode = CVal->getZExtValue();
8994 assert(Mode < 4 && "Unsupported rounding mode!");
8995 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
8996 if (Subtarget.isISA3_0())
8997 return SDValue(
8998 DAG.getMachineNode(
8999 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9000 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9001 1);
9002 SDNode *SetHi = DAG.getMachineNode(
9003 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9004 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9005 SDNode *SetLo = DAG.getMachineNode(
9006 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9007 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9008 return SDValue(SetLo, 0);
9009 }
9010
9011 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9012 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9013 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9014 DAG.getConstant(3, Dl, MVT::i32));
9015 SDValue DstFlag = DAG.getNode(
9016 ISD::XOR, Dl, MVT::i32, SrcFlag,
9017 DAG.getNode(ISD::AND, Dl, MVT::i32,
9018 DAG.getNOT(Dl,
9019 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9020 MVT::i32),
9021 One));
9022 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9023 SDValue MFFS;
9024 if (!Subtarget.isISA3_0()) {
9025 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9026 Chain = MFFS.getValue(1);
9027 }
9028 SDValue NewFPSCR;
9029 if (Subtarget.isPPC64()) {
9030 if (Subtarget.isISA3_0()) {
9031 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9032 } else {
9033 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9034 SDNode *InsertRN = DAG.getMachineNode(
9035 PPC::RLDIMI, Dl, MVT::i64,
9036 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9037 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9038 DAG.getTargetConstant(0, Dl, MVT::i32),
9039 DAG.getTargetConstant(62, Dl, MVT::i32)});
9040 NewFPSCR = SDValue(InsertRN, 0);
9041 }
9042 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9043 } else {
9044 // In 32-bit mode, store f64, load and update the lower half.
9045 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9046 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9047 SDValue Addr = Subtarget.isLittleEndian()
9048 ? StackSlot
9049 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9050 DAG.getConstant(4, Dl, PtrVT));
9051 if (Subtarget.isISA3_0()) {
9052 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9053 } else {
9054 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9055 SDValue Tmp =
9056 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9057 Chain = Tmp.getValue(1);
9058 Tmp = SDValue(DAG.getMachineNode(
9059 PPC::RLWIMI, Dl, MVT::i32,
9060 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9061 DAG.getTargetConstant(30, Dl, MVT::i32),
9062 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9063 0);
9064 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9065 }
9066 NewFPSCR =
9067 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9068 Chain = NewFPSCR.getValue(1);
9069 }
9070 if (Subtarget.isISA3_0())
9071 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9072 {NewFPSCR, Chain}),
9073 1);
9074 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9075 SDNode *MTFSF = DAG.getMachineNode(
9076 PPC::MTFSF, Dl, MVT::Other,
9077 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9078 return SDValue(MTFSF, 0);
9079}
9080
9081SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9082 SelectionDAG &DAG) const {
9083 SDLoc dl(Op);
9084 /*
9085 The rounding mode is in bits 30:31 of FPSR, and has the following
9086 settings:
9087 00 Round to nearest
9088 01 Round to 0
9089 10 Round to +inf
9090 11 Round to -inf
9091
9092 GET_ROUNDING, on the other hand, expects the following:
9093 -1 Undefined
9094 0 Round to 0
9095 1 Round to nearest
9096 2 Round to +inf
9097 3 Round to -inf
9098
9099 To perform the conversion, we do:
9100 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9101 */
9102
9103 MachineFunction &MF = DAG.getMachineFunction();
9104 EVT VT = Op.getValueType();
9105 EVT PtrVT = getPointerTy(MF.getDataLayout());
9106
9107 // Save FP Control Word to register
9108 SDValue Chain = Op.getOperand(0);
9109 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9110 Chain = MFFS.getValue(1);
9111
9112 SDValue CWD;
9113 if (isTypeLegal(MVT::i64)) {
9114 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9115 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9116 } else {
9117 // Save FP register to stack slot
9118 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9119 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9120 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9121
9122 // Load FP Control Word from low 32 bits of stack slot.
9124 "Stack slot adjustment is valid only on big endian subtargets!");
9125 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9126 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9127 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9128 Chain = CWD.getValue(1);
9129 }
9130
9131 // Transform as necessary
9132 SDValue CWD1 =
9133 DAG.getNode(ISD::AND, dl, MVT::i32,
9134 CWD, DAG.getConstant(3, dl, MVT::i32));
9135 SDValue CWD2 =
9136 DAG.getNode(ISD::SRL, dl, MVT::i32,
9137 DAG.getNode(ISD::AND, dl, MVT::i32,
9138 DAG.getNode(ISD::XOR, dl, MVT::i32,
9139 CWD, DAG.getConstant(3, dl, MVT::i32)),
9140 DAG.getConstant(3, dl, MVT::i32)),
9141 DAG.getConstant(1, dl, MVT::i32));
9142
9143 SDValue RetVal =
9144 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9145
9146 RetVal =
9148 dl, VT, RetVal);
9149
9150 return DAG.getMergeValues({RetVal, Chain}, dl);
9151}
9152
9153SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9154 EVT VT = Op.getValueType();
9155 uint64_t BitWidth = VT.getSizeInBits();
9156 SDLoc dl(Op);
9157 assert(Op.getNumOperands() == 3 &&
9158 VT == Op.getOperand(1).getValueType() &&
9159 "Unexpected SHL!");
9160
9161 // Expand into a bunch of logical ops. Note that these ops
9162 // depend on the PPC behavior for oversized shift amounts.
9163 SDValue Lo = Op.getOperand(0);
9164 SDValue Hi = Op.getOperand(1);
9165 SDValue Amt = Op.getOperand(2);
9166 EVT AmtVT = Amt.getValueType();
9167
9168 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9169 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9170 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9171 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9172 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9173 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9174 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9175 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9176 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9177 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9178 SDValue OutOps[] = { OutLo, OutHi };
9179 return DAG.getMergeValues(OutOps, dl);
9180}
9181
9182SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9183 EVT VT = Op.getValueType();
9184 SDLoc dl(Op);
9185 uint64_t BitWidth = VT.getSizeInBits();
9186 assert(Op.getNumOperands() == 3 &&
9187 VT == Op.getOperand(1).getValueType() &&
9188 "Unexpected SRL!");
9189
9190 // Expand into a bunch of logical ops. Note that these ops
9191 // depend on the PPC behavior for oversized shift amounts.
9192 SDValue Lo = Op.getOperand(0);
9193 SDValue Hi = Op.getOperand(1);
9194 SDValue Amt = Op.getOperand(2);
9195 EVT AmtVT = Amt.getValueType();
9196
9197 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9198 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9199 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9200 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9201 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9202 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9203 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9204 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9205 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9206 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9207 SDValue OutOps[] = { OutLo, OutHi };
9208 return DAG.getMergeValues(OutOps, dl);
9209}
9210
9211SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9212 SDLoc dl(Op);
9213 EVT VT = Op.getValueType();
9214 uint64_t BitWidth = VT.getSizeInBits();
9215 assert(Op.getNumOperands() == 3 &&
9216 VT == Op.getOperand(1).getValueType() &&
9217 "Unexpected SRA!");
9218
9219 // Expand into a bunch of logical ops, followed by a select_cc.
9220 SDValue Lo = Op.getOperand(0);
9221 SDValue Hi = Op.getOperand(1);
9222 SDValue Amt = Op.getOperand(2);
9223 EVT AmtVT = Amt.getValueType();
9224
9225 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9226 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9227 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9228 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9229 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9230 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9231 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9232 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9233 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9234 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9235 Tmp4, Tmp6, ISD::SETLE);
9236 SDValue OutOps[] = { OutLo, OutHi };
9237 return DAG.getMergeValues(OutOps, dl);
9238}
9239
9240SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9241 SelectionDAG &DAG) const {
9242 SDLoc dl(Op);
9243 EVT VT = Op.getValueType();
9244 unsigned BitWidth = VT.getSizeInBits();
9245
9246 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9247 SDValue X = Op.getOperand(0);
9248 SDValue Y = Op.getOperand(1);
9249 SDValue Z = Op.getOperand(2);
9250 EVT AmtVT = Z.getValueType();
9251
9252 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9253 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9254 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9255 // on PowerPC shift by BW being well defined.
9256 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9257 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9258 SDValue SubZ =
9259 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9260 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9261 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9262 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9263}
9264
9265//===----------------------------------------------------------------------===//
9266// Vector related lowering.
9267//
9268
9269/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9270/// element size of SplatSize. Cast the result to VT.
9271static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9272 SelectionDAG &DAG, const SDLoc &dl) {
9273 static const MVT VTys[] = { // canonical VT to use for each size.
9274 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9275 };
9276
9277 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9278
9279 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9280 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9281 SplatSize = 1;
9282 Val = 0xFF;
9283 }
9284
9285 EVT CanonicalVT = VTys[SplatSize-1];
9286
9287 // Build a canonical splat for this value.
9288 // Explicitly truncate APInt here, as this API is used with a mix of
9289 // signed and unsigned values.
9290 return DAG.getBitcast(
9291 ReqVT,
9292 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9293}
9294
9295/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9296/// specified intrinsic ID.
9298 const SDLoc &dl, EVT DestVT = MVT::Other) {
9299 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9300 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9301 DAG.getConstant(IID, dl, MVT::i32), Op);
9302}
9303
9304/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9305/// specified intrinsic ID.
9307 SelectionDAG &DAG, const SDLoc &dl,
9308 EVT DestVT = MVT::Other) {
9309 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9310 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9311 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9312}
9313
9314/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9315/// specified intrinsic ID.
9316static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9317 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9318 EVT DestVT = MVT::Other) {
9319 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9320 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9321 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9322}
9323
9324/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9325/// amount. The result has the specified value type.
9326static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9327 SelectionDAG &DAG, const SDLoc &dl) {
9328 // Force LHS/RHS to be the right type.
9329 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9330 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9331
9332 int Ops[16];
9333 for (unsigned i = 0; i != 16; ++i)
9334 Ops[i] = i + Amt;
9335 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9336 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9337}
9338
9339/// Do we have an efficient pattern in a .td file for this node?
9340///
9341/// \param V - pointer to the BuildVectorSDNode being matched
9342/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9343///
9344/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9345/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9346/// the opposite is true (expansion is beneficial) are:
9347/// - The node builds a vector out of integers that are not 32 or 64-bits
9348/// - The node builds a vector out of constants
9349/// - The node is a "load-and-splat"
9350/// In all other cases, we will choose to keep the BUILD_VECTOR.
9352 bool HasDirectMove,
9353 bool HasP8Vector) {
9354 EVT VecVT = V->getValueType(0);
9355 bool RightType = VecVT == MVT::v2f64 ||
9356 (HasP8Vector && VecVT == MVT::v4f32) ||
9357 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9358 if (!RightType)
9359 return false;
9360
9361 bool IsSplat = true;
9362 bool IsLoad = false;
9363 SDValue Op0 = V->getOperand(0);
9364
9365 // This function is called in a block that confirms the node is not a constant
9366 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9367 // different constants.
9368 if (V->isConstant())
9369 return false;
9370 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9371 if (V->getOperand(i).isUndef())
9372 return false;
9373 // We want to expand nodes that represent load-and-splat even if the
9374 // loaded value is a floating point truncation or conversion to int.
9375 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9376 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9377 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9378 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9379 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9380 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9381 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9382 IsLoad = true;
9383 // If the operands are different or the input is not a load and has more
9384 // uses than just this BV node, then it isn't a splat.
9385 if (V->getOperand(i) != Op0 ||
9386 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9387 IsSplat = false;
9388 }
9389 return !(IsSplat && IsLoad);
9390}
9391
9392// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9393SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9394
9395 SDLoc dl(Op);
9396 SDValue Op0 = Op->getOperand(0);
9397
9398 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9399 (Op.getValueType() != MVT::f128))
9400 return SDValue();
9401
9402 SDValue Lo = Op0.getOperand(0);
9403 SDValue Hi = Op0.getOperand(1);
9404 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9405 return SDValue();
9406
9407 if (!Subtarget.isLittleEndian())
9408 std::swap(Lo, Hi);
9409
9410 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9411}
9412
9413static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9414 const SDValue *InputLoad = &Op;
9415 while (InputLoad->getOpcode() == ISD::BITCAST)
9416 InputLoad = &InputLoad->getOperand(0);
9417 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9418 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9419 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9420 InputLoad = &InputLoad->getOperand(0);
9421 }
9422 if (InputLoad->getOpcode() != ISD::LOAD)
9423 return nullptr;
9424 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9425 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9426}
9427
9428// Convert the argument APFloat to a single precision APFloat if there is no
9429// loss in information during the conversion to single precision APFloat and the
9430// resulting number is not a denormal number. Return true if successful.
9432 APFloat APFloatToConvert = ArgAPFloat;
9433 bool LosesInfo = true;
9435 &LosesInfo);
9436 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9437 if (Success)
9438 ArgAPFloat = APFloatToConvert;
9439 return Success;
9440}
9441
9442// Bitcast the argument APInt to a double and convert it to a single precision
9443// APFloat, bitcast the APFloat to an APInt and assign it to the original
9444// argument if there is no loss in information during the conversion from
9445// double to single precision APFloat and the resulting number is not a denormal
9446// number. Return true if successful.
9448 double DpValue = ArgAPInt.bitsToDouble();
9449 APFloat APFloatDp(DpValue);
9450 bool Success = convertToNonDenormSingle(APFloatDp);
9451 if (Success)
9452 ArgAPInt = APFloatDp.bitcastToAPInt();
9453 return Success;
9454}
9455
9456// Nondestructive check for convertTonNonDenormSingle.
9458 // Only convert if it loses info, since XXSPLTIDP should
9459 // handle the other case.
9460 APFloat APFloatToConvert = ArgAPFloat;
9461 bool LosesInfo = true;
9463 &LosesInfo);
9464
9465 return (!LosesInfo && !APFloatToConvert.isDenormal());
9466}
9467
9468static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9469 unsigned &Opcode) {
9470 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9471 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9472 return false;
9473
9474 EVT Ty = Op->getValueType(0);
9475 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9476 // as we cannot handle extending loads for these types.
9477 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9478 ISD::isNON_EXTLoad(InputNode))
9479 return true;
9480
9481 EVT MemVT = InputNode->getMemoryVT();
9482 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9483 // memory VT is the same vector element VT type.
9484 // The loads feeding into the v8i16 and v16i8 types will be extending because
9485 // scalar i8/i16 are not legal types.
9486 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9487 (MemVT == Ty.getVectorElementType()))
9488 return true;
9489
9490 if (Ty == MVT::v2i64) {
9491 // Check the extend type, when the input type is i32, and the output vector
9492 // type is v2i64.
9493 if (MemVT == MVT::i32) {
9494 if (ISD::isZEXTLoad(InputNode))
9495 Opcode = PPCISD::ZEXT_LD_SPLAT;
9496 if (ISD::isSEXTLoad(InputNode))
9497 Opcode = PPCISD::SEXT_LD_SPLAT;
9498 }
9499 return true;
9500 }
9501 return false;
9502}
9503
9505 bool IsLittleEndian) {
9506 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9507
9508 BitMask.clearAllBits();
9509 EVT VT = BVN.getValueType(0);
9510 unsigned VTSize = VT.getSizeInBits();
9511 APInt ConstValue(VTSize, 0);
9512
9513 unsigned EltWidth = VT.getScalarSizeInBits();
9514
9515 unsigned BitPos = 0;
9516 for (auto OpVal : BVN.op_values()) {
9517 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9518
9519 if (!CN)
9520 return false;
9521 // The elements in a vector register are ordered in reverse byte order
9522 // between little-endian and big-endian modes.
9523 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9524 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9525 BitPos += EltWidth;
9526 }
9527
9528 for (unsigned J = 0; J < 16; ++J) {
9529 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9530 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9531 return false;
9532 if (ExtractValue == 0xFF)
9533 BitMask.setBit(J);
9534 }
9535 return true;
9536}
9537
9538// If this is a case we can't handle, return null and let the default
9539// expansion code take care of it. If we CAN select this case, and if it
9540// selects to a single instruction, return Op. Otherwise, if we can codegen
9541// this case more efficiently than a constant pool load, lower it to the
9542// sequence of ops that should be used.
9543SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9544 SelectionDAG &DAG) const {
9545 SDLoc dl(Op);
9546 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9547 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9548
9549 if (Subtarget.hasP10Vector()) {
9550 APInt BitMask(32, 0);
9551 // If the value of the vector is all zeros or all ones,
9552 // we do not convert it to MTVSRBMI.
9553 // The xxleqv instruction sets a vector with all ones.
9554 // The xxlxor instruction sets a vector with all zeros.
9555 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9556 BitMask != 0 && BitMask != 0xffff) {
9557 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9558 MachineSDNode *MSDNode =
9559 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9560 SDValue SDV = SDValue(MSDNode, 0);
9561 EVT DVT = BVN->getValueType(0);
9562 EVT SVT = SDV.getValueType();
9563 if (SVT != DVT) {
9564 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9565 }
9566 return SDV;
9567 }
9568 // Recognize build vector patterns to emit VSX vector instructions
9569 // instead of loading value from memory.
9570 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9571 return VecPat;
9572 }
9573 // Check if this is a splat of a constant value.
9574 APInt APSplatBits, APSplatUndef;
9575 unsigned SplatBitSize;
9576 bool HasAnyUndefs;
9577 bool BVNIsConstantSplat =
9578 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9579 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9580
9581 // If it is a splat of a double, check if we can shrink it to a 32 bit
9582 // non-denormal float which when converted back to double gives us the same
9583 // double. This is to exploit the XXSPLTIDP instruction.
9584 // If we lose precision, we use XXSPLTI32DX.
9585 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9586 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9587 // Check the type first to short-circuit so we don't modify APSplatBits if
9588 // this block isn't executed.
9589 if ((Op->getValueType(0) == MVT::v2f64) &&
9590 convertToNonDenormSingle(APSplatBits)) {
9591 SDValue SplatNode = DAG.getNode(
9592 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9593 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9594 return DAG.getBitcast(Op.getValueType(), SplatNode);
9595 } else {
9596 // We may lose precision, so we have to use XXSPLTI32DX.
9597
9598 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9599 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9600 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9601
9602 if (!Hi || !Lo)
9603 // If either load is 0, then we should generate XXLXOR to set to 0.
9604 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9605
9606 if (Hi)
9607 SplatNode = DAG.getNode(
9608 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9609 DAG.getTargetConstant(0, dl, MVT::i32),
9610 DAG.getTargetConstant(Hi, dl, MVT::i32));
9611
9612 if (Lo)
9613 SplatNode =
9614 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9615 DAG.getTargetConstant(1, dl, MVT::i32),
9616 DAG.getTargetConstant(Lo, dl, MVT::i32));
9617
9618 return DAG.getBitcast(Op.getValueType(), SplatNode);
9619 }
9620 }
9621
9622 bool IsSplat64 = false;
9623 uint64_t SplatBits = 0;
9624 int32_t SextVal = 0;
9625 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9626 SplatBits = APSplatBits.getZExtValue();
9627 if (SplatBitSize <= 32) {
9628 SextVal = SignExtend32(SplatBits, SplatBitSize);
9629 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9630 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9631 bool P9Vector = Subtarget.hasP9Vector();
9632 int32_t Hi = P9Vector ? 127 : 15;
9633 int32_t Lo = P9Vector ? -128 : -16;
9634 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9635 SextVal = static_cast<int32_t>(SplatBits);
9636 }
9637 }
9638
9639 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9640 unsigned NewOpcode = PPCISD::LD_SPLAT;
9641
9642 // Handle load-and-splat patterns as we have instructions that will do this
9643 // in one go.
9644 if (DAG.isSplatValue(Op, true) &&
9645 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9646 const SDValue *InputLoad = &Op.getOperand(0);
9647 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9648
9649 // If the input load is an extending load, it will be an i32 -> i64
9650 // extending load and isValidSplatLoad() will update NewOpcode.
9651 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9652 unsigned ElementSize =
9653 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9654
9655 assert(((ElementSize == 2 * MemorySize)
9656 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9657 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9658 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9659 "Unmatched element size and opcode!\n");
9660
9661 // Checking for a single use of this load, we have to check for vector
9662 // width (128 bits) / ElementSize uses (since each operand of the
9663 // BUILD_VECTOR is a separate use of the value.
9664 unsigned NumUsesOfInputLD = 128 / ElementSize;
9665 for (SDValue BVInOp : Op->ops())
9666 if (BVInOp.isUndef())
9667 NumUsesOfInputLD--;
9668
9669 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9670 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9671 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9672 // 15", but function IsValidSplatLoad() now will only return true when
9673 // the data at index 0 is not nullptr. So we will not get into trouble for
9674 // these cases.
9675 //
9676 // case 1 - lfiwzx/lfiwax
9677 // 1.1: load result is i32 and is sign/zero extend to i64;
9678 // 1.2: build a v2i64 vector type with above loaded value;
9679 // 1.3: the vector has only one value at index 0, others are all undef;
9680 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9681 if (NumUsesOfInputLD == 1 &&
9682 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9683 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9684 Subtarget.hasLFIWAX()))
9685 return SDValue();
9686
9687 // case 2 - lxvr[hb]x
9688 // 2.1: load result is at most i16;
9689 // 2.2: build a vector with above loaded value;
9690 // 2.3: the vector has only one value at index 0, others are all undef;
9691 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9692 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9693 Subtarget.isISA3_1() && ElementSize <= 16)
9694 return SDValue();
9695
9696 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9697 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9698 Subtarget.hasVSX()) {
9699 SDValue Ops[] = {
9700 LD->getChain(), // Chain
9701 LD->getBasePtr(), // Ptr
9702 DAG.getValueType(Op.getValueType()) // VT
9703 };
9704 SDValue LdSplt = DAG.getMemIntrinsicNode(
9705 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9706 LD->getMemoryVT(), LD->getMemOperand());
9707 // Replace all uses of the output chain of the original load with the
9708 // output chain of the new load.
9709 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9710 LdSplt.getValue(1));
9711 return LdSplt;
9712 }
9713 }
9714
9715 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9716 // 32-bits can be lowered to VSX instructions under certain conditions.
9717 // Without VSX, there is no pattern more efficient than expanding the node.
9718 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9719 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9720 Subtarget.hasP8Vector()))
9721 return Op;
9722 return SDValue();
9723 }
9724
9725 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9726 unsigned SplatSize = SplatBitSize / 8;
9727
9728 // First, handle single instruction cases.
9729
9730 // All zeros?
9731 if (SplatBits == 0) {
9732 // Canonicalize all zero vectors to be v4i32.
9733 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9734 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9735 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9736 }
9737 return Op;
9738 }
9739
9740 // We have XXSPLTIW for constant splats four bytes wide.
9741 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9742 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9743 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9744 // turned into a 4-byte splat of 0xABABABAB.
9745 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9746 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9747 Op.getValueType(), DAG, dl);
9748
9749 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9750 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9751 dl);
9752
9753 // We have XXSPLTIB for constant splats one byte wide.
9754 if (Subtarget.hasP9Vector() && SplatSize == 1)
9755 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9756 dl);
9757
9758 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9759 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9760 if (SextVal >= -16 && SextVal <= 15) {
9761 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9762 // generate a splat word with extend for size 8.
9763 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9764 SDValue Res =
9765 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9766 if (SplatSize != 8)
9767 return Res;
9768 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9769 }
9770
9771 // Two instruction sequences.
9772
9773 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9774 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9776 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9777 unsigned IID;
9778 EVT VT;
9779 switch (SplatSize) {
9780 default:
9781 llvm_unreachable("Unexpected type for vector constant.");
9782 case 2:
9783 IID = Intrinsic::ppc_altivec_vupklsb;
9784 VT = MVT::v8i16;
9785 break;
9786 case 4:
9787 IID = Intrinsic::ppc_altivec_vextsb2w;
9788 VT = MVT::v4i32;
9789 break;
9790 case 8:
9791 IID = Intrinsic::ppc_altivec_vextsb2d;
9792 VT = MVT::v2i64;
9793 break;
9794 }
9795 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9796 return DAG.getBitcast(Op->getValueType(0), Extend);
9797 }
9798 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9799
9800 // If this value is in the range [-32,30] and is even, use:
9801 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9802 // If this value is in the range [17,31] and is odd, use:
9803 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9804 // If this value is in the range [-31,-17] and is odd, use:
9805 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9806 // Note the last two are three-instruction sequences.
9807 if (SextVal >= -32 && SextVal <= 31) {
9808 // To avoid having these optimizations undone by constant folding,
9809 // we convert to a pseudo that will be expanded later into one of
9810 // the above forms.
9811 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9812 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9813 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9814 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9815 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9816 if (VT == Op.getValueType())
9817 return RetVal;
9818 else
9819 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9820 }
9821
9822 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9823 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9824 // for fneg/fabs.
9825 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9826 // Make -1 and vspltisw -1:
9827 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9828
9829 // Make the VSLW intrinsic, computing 0x8000_0000.
9830 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9831 OnesV, DAG, dl);
9832
9833 // xor by OnesV to invert it.
9834 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9835 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9836 }
9837
9838 // Check to see if this is a wide variety of vsplti*, binop self cases.
9839 static const signed char SplatCsts[] = {
9840 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9841 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9842 };
9843
9844 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9845 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9846 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9847 int i = SplatCsts[idx];
9848
9849 // Figure out what shift amount will be used by altivec if shifted by i in
9850 // this splat size.
9851 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9852
9853 // vsplti + shl self.
9854 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9855 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9856 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9857 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9858 Intrinsic::ppc_altivec_vslw
9859 };
9860 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9861 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9862 }
9863
9864 // vsplti + srl self.
9865 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9866 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9867 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9868 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9869 Intrinsic::ppc_altivec_vsrw
9870 };
9871 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9872 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9873 }
9874
9875 // vsplti + rol self.
9876 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9877 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9878 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9879 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9880 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9881 Intrinsic::ppc_altivec_vrlw
9882 };
9883 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9884 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9885 }
9886
9887 // t = vsplti c, result = vsldoi t, t, 1
9888 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9889 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9890 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9891 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9892 }
9893 // t = vsplti c, result = vsldoi t, t, 2
9894 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9895 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9896 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9897 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9898 }
9899 // t = vsplti c, result = vsldoi t, t, 3
9900 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9901 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9902 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9903 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9904 }
9905 }
9906
9907 return SDValue();
9908}
9909
9910/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9911/// the specified operations to build the shuffle.
9913 SDValue RHS, SelectionDAG &DAG,
9914 const SDLoc &dl) {
9915 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9916 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9917 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9918
9919 enum {
9920 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9921 OP_VMRGHW,
9922 OP_VMRGLW,
9923 OP_VSPLTISW0,
9924 OP_VSPLTISW1,
9925 OP_VSPLTISW2,
9926 OP_VSPLTISW3,
9927 OP_VSLDOI4,
9928 OP_VSLDOI8,
9929 OP_VSLDOI12
9930 };
9931
9932 if (OpNum == OP_COPY) {
9933 if (LHSID == (1*9+2)*9+3) return LHS;
9934 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9935 return RHS;
9936 }
9937
9938 SDValue OpLHS, OpRHS;
9939 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9940 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9941
9942 int ShufIdxs[16];
9943 switch (OpNum) {
9944 default: llvm_unreachable("Unknown i32 permute!");
9945 case OP_VMRGHW:
9946 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9947 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9948 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9949 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9950 break;
9951 case OP_VMRGLW:
9952 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9953 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9954 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9955 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9956 break;
9957 case OP_VSPLTISW0:
9958 for (unsigned i = 0; i != 16; ++i)
9959 ShufIdxs[i] = (i&3)+0;
9960 break;
9961 case OP_VSPLTISW1:
9962 for (unsigned i = 0; i != 16; ++i)
9963 ShufIdxs[i] = (i&3)+4;
9964 break;
9965 case OP_VSPLTISW2:
9966 for (unsigned i = 0; i != 16; ++i)
9967 ShufIdxs[i] = (i&3)+8;
9968 break;
9969 case OP_VSPLTISW3:
9970 for (unsigned i = 0; i != 16; ++i)
9971 ShufIdxs[i] = (i&3)+12;
9972 break;
9973 case OP_VSLDOI4:
9974 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9975 case OP_VSLDOI8:
9976 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9977 case OP_VSLDOI12:
9978 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9979 }
9980 EVT VT = OpLHS.getValueType();
9981 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9982 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9983 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9984 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9985}
9986
9987/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9988/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9989/// SDValue.
9990SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9991 SelectionDAG &DAG) const {
9992 const unsigned BytesInVector = 16;
9993 bool IsLE = Subtarget.isLittleEndian();
9994 SDLoc dl(N);
9995 SDValue V1 = N->getOperand(0);
9996 SDValue V2 = N->getOperand(1);
9997 unsigned ShiftElts = 0, InsertAtByte = 0;
9998 bool Swap = false;
9999
10000 // Shifts required to get the byte we want at element 7.
10001 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10002 0, 15, 14, 13, 12, 11, 10, 9};
10003 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10004 1, 2, 3, 4, 5, 6, 7, 8};
10005
10006 ArrayRef<int> Mask = N->getMask();
10007 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10008
10009 // For each mask element, find out if we're just inserting something
10010 // from V2 into V1 or vice versa.
10011 // Possible permutations inserting an element from V2 into V1:
10012 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10013 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10014 // ...
10015 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10016 // Inserting from V1 into V2 will be similar, except mask range will be
10017 // [16,31].
10018
10019 bool FoundCandidate = false;
10020 // If both vector operands for the shuffle are the same vector, the mask
10021 // will contain only elements from the first one and the second one will be
10022 // undef.
10023 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10024 // Go through the mask of half-words to find an element that's being moved
10025 // from one vector to the other.
10026 for (unsigned i = 0; i < BytesInVector; ++i) {
10027 unsigned CurrentElement = Mask[i];
10028 // If 2nd operand is undefined, we should only look for element 7 in the
10029 // Mask.
10030 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10031 continue;
10032
10033 bool OtherElementsInOrder = true;
10034 // Examine the other elements in the Mask to see if they're in original
10035 // order.
10036 for (unsigned j = 0; j < BytesInVector; ++j) {
10037 if (j == i)
10038 continue;
10039 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10040 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10041 // in which we always assume we're always picking from the 1st operand.
10042 int MaskOffset =
10043 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10044 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10045 OtherElementsInOrder = false;
10046 break;
10047 }
10048 }
10049 // If other elements are in original order, we record the number of shifts
10050 // we need to get the element we want into element 7. Also record which byte
10051 // in the vector we should insert into.
10052 if (OtherElementsInOrder) {
10053 // If 2nd operand is undefined, we assume no shifts and no swapping.
10054 if (V2.isUndef()) {
10055 ShiftElts = 0;
10056 Swap = false;
10057 } else {
10058 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10059 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10060 : BigEndianShifts[CurrentElement & 0xF];
10061 Swap = CurrentElement < BytesInVector;
10062 }
10063 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10064 FoundCandidate = true;
10065 break;
10066 }
10067 }
10068
10069 if (!FoundCandidate)
10070 return SDValue();
10071
10072 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10073 // optionally with VECSHL if shift is required.
10074 if (Swap)
10075 std::swap(V1, V2);
10076 if (V2.isUndef())
10077 V2 = V1;
10078 if (ShiftElts) {
10079 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10080 DAG.getConstant(ShiftElts, dl, MVT::i32));
10081 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10082 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10083 }
10084 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10085 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10086}
10087
10088/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10089/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10090/// SDValue.
10091SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10092 SelectionDAG &DAG) const {
10093 const unsigned NumHalfWords = 8;
10094 const unsigned BytesInVector = NumHalfWords * 2;
10095 // Check that the shuffle is on half-words.
10096 if (!isNByteElemShuffleMask(N, 2, 1))
10097 return SDValue();
10098
10099 bool IsLE = Subtarget.isLittleEndian();
10100 SDLoc dl(N);
10101 SDValue V1 = N->getOperand(0);
10102 SDValue V2 = N->getOperand(1);
10103 unsigned ShiftElts = 0, InsertAtByte = 0;
10104 bool Swap = false;
10105
10106 // Shifts required to get the half-word we want at element 3.
10107 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10108 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10109
10110 uint32_t Mask = 0;
10111 uint32_t OriginalOrderLow = 0x1234567;
10112 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10113 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10114 // 32-bit space, only need 4-bit nibbles per element.
10115 for (unsigned i = 0; i < NumHalfWords; ++i) {
10116 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10117 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10118 }
10119
10120 // For each mask element, find out if we're just inserting something
10121 // from V2 into V1 or vice versa. Possible permutations inserting an element
10122 // from V2 into V1:
10123 // X, 1, 2, 3, 4, 5, 6, 7
10124 // 0, X, 2, 3, 4, 5, 6, 7
10125 // 0, 1, X, 3, 4, 5, 6, 7
10126 // 0, 1, 2, X, 4, 5, 6, 7
10127 // 0, 1, 2, 3, X, 5, 6, 7
10128 // 0, 1, 2, 3, 4, X, 6, 7
10129 // 0, 1, 2, 3, 4, 5, X, 7
10130 // 0, 1, 2, 3, 4, 5, 6, X
10131 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10132
10133 bool FoundCandidate = false;
10134 // Go through the mask of half-words to find an element that's being moved
10135 // from one vector to the other.
10136 for (unsigned i = 0; i < NumHalfWords; ++i) {
10137 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10138 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10139 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10140 uint32_t TargetOrder = 0x0;
10141
10142 // If both vector operands for the shuffle are the same vector, the mask
10143 // will contain only elements from the first one and the second one will be
10144 // undef.
10145 if (V2.isUndef()) {
10146 ShiftElts = 0;
10147 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10148 TargetOrder = OriginalOrderLow;
10149 Swap = false;
10150 // Skip if not the correct element or mask of other elements don't equal
10151 // to our expected order.
10152 if (MaskOneElt == VINSERTHSrcElem &&
10153 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10154 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10155 FoundCandidate = true;
10156 break;
10157 }
10158 } else { // If both operands are defined.
10159 // Target order is [8,15] if the current mask is between [0,7].
10160 TargetOrder =
10161 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10162 // Skip if mask of other elements don't equal our expected order.
10163 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10164 // We only need the last 3 bits for the number of shifts.
10165 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10166 : BigEndianShifts[MaskOneElt & 0x7];
10167 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10168 Swap = MaskOneElt < NumHalfWords;
10169 FoundCandidate = true;
10170 break;
10171 }
10172 }
10173 }
10174
10175 if (!FoundCandidate)
10176 return SDValue();
10177
10178 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10179 // optionally with VECSHL if shift is required.
10180 if (Swap)
10181 std::swap(V1, V2);
10182 if (V2.isUndef())
10183 V2 = V1;
10184 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10185 if (ShiftElts) {
10186 // Double ShiftElts because we're left shifting on v16i8 type.
10187 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10188 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10189 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10190 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10191 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10192 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10193 }
10194 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10195 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10196 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10197 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10198}
10199
10200/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10201/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10202/// return the default SDValue.
10203SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10204 SelectionDAG &DAG) const {
10205 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10206 // to v16i8. Peek through the bitcasts to get the actual operands.
10209
10210 auto ShuffleMask = SVN->getMask();
10211 SDValue VecShuffle(SVN, 0);
10212 SDLoc DL(SVN);
10213
10214 // Check that we have a four byte shuffle.
10215 if (!isNByteElemShuffleMask(SVN, 4, 1))
10216 return SDValue();
10217
10218 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10219 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10220 std::swap(LHS, RHS);
10222 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10223 if (!CommutedSV)
10224 return SDValue();
10225 ShuffleMask = CommutedSV->getMask();
10226 }
10227
10228 // Ensure that the RHS is a vector of constants.
10229 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10230 if (!BVN)
10231 return SDValue();
10232
10233 // Check if RHS is a splat of 4-bytes (or smaller).
10234 APInt APSplatValue, APSplatUndef;
10235 unsigned SplatBitSize;
10236 bool HasAnyUndefs;
10237 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10238 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10239 SplatBitSize > 32)
10240 return SDValue();
10241
10242 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10243 // The instruction splats a constant C into two words of the source vector
10244 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10245 // Thus we check that the shuffle mask is the equivalent of
10246 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10247 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10248 // within each word are consecutive, so we only need to check the first byte.
10249 SDValue Index;
10250 bool IsLE = Subtarget.isLittleEndian();
10251 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10252 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10253 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10254 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10255 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10256 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10257 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10258 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10259 else
10260 return SDValue();
10261
10262 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10263 // for XXSPLTI32DX.
10264 unsigned SplatVal = APSplatValue.getZExtValue();
10265 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10266 SplatVal |= (SplatVal << SplatBitSize);
10267
10268 SDValue SplatNode = DAG.getNode(
10269 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10270 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10271 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10272}
10273
10274/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10275/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10276/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10277/// i.e (or (shl x, C1), (srl x, 128-C1)).
10278SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10279 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10280 assert(Op.getValueType() == MVT::v1i128 &&
10281 "Only set v1i128 as custom, other type shouldn't reach here!");
10282 SDLoc dl(Op);
10283 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10284 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10285 unsigned SHLAmt = N1.getConstantOperandVal(0);
10286 if (SHLAmt % 8 == 0) {
10287 std::array<int, 16> Mask;
10288 std::iota(Mask.begin(), Mask.end(), 0);
10289 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10290 if (SDValue Shuffle =
10291 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10292 DAG.getUNDEF(MVT::v16i8), Mask))
10293 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10294 }
10295 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10296 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10297 DAG.getConstant(SHLAmt, dl, MVT::i32));
10298 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10299 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10300 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10301 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10302}
10303
10304/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10305/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10306/// return the code it can be lowered into. Worst case, it can always be
10307/// lowered into a vperm.
10308SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10309 SelectionDAG &DAG) const {
10310 SDLoc dl(Op);
10311 SDValue V1 = Op.getOperand(0);
10312 SDValue V2 = Op.getOperand(1);
10313 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10314
10315 // Any nodes that were combined in the target-independent combiner prior
10316 // to vector legalization will not be sent to the target combine. Try to
10317 // combine it here.
10318 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10319 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10320 return NewShuffle;
10321 Op = NewShuffle;
10323 V1 = Op.getOperand(0);
10324 V2 = Op.getOperand(1);
10325 }
10326 EVT VT = Op.getValueType();
10327 bool isLittleEndian = Subtarget.isLittleEndian();
10328
10329 unsigned ShiftElts, InsertAtByte;
10330 bool Swap = false;
10331
10332 // If this is a load-and-splat, we can do that with a single instruction
10333 // in some cases. However if the load has multiple uses, we don't want to
10334 // combine it because that will just produce multiple loads.
10335 bool IsPermutedLoad = false;
10336 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10337 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10338 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10339 InputLoad->hasOneUse()) {
10340 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10341 int SplatIdx =
10342 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10343
10344 // The splat index for permuted loads will be in the left half of the vector
10345 // which is strictly wider than the loaded value by 8 bytes. So we need to
10346 // adjust the splat index to point to the correct address in memory.
10347 if (IsPermutedLoad) {
10348 assert((isLittleEndian || IsFourByte) &&
10349 "Unexpected size for permuted load on big endian target");
10350 SplatIdx += IsFourByte ? 2 : 1;
10351 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10352 "Splat of a value outside of the loaded memory");
10353 }
10354
10355 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10356 // For 4-byte load-and-splat, we need Power9.
10357 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10358 uint64_t Offset = 0;
10359 if (IsFourByte)
10360 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10361 else
10362 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10363
10364 // If the width of the load is the same as the width of the splat,
10365 // loading with an offset would load the wrong memory.
10366 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10367 Offset = 0;
10368
10369 SDValue BasePtr = LD->getBasePtr();
10370 if (Offset != 0)
10372 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10373 SDValue Ops[] = {
10374 LD->getChain(), // Chain
10375 BasePtr, // BasePtr
10376 DAG.getValueType(Op.getValueType()) // VT
10377 };
10378 SDVTList VTL =
10379 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10380 SDValue LdSplt =
10381 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10382 Ops, LD->getMemoryVT(), LD->getMemOperand());
10383 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10384 if (LdSplt.getValueType() != SVOp->getValueType(0))
10385 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10386 return LdSplt;
10387 }
10388 }
10389
10390 // All v2i64 and v2f64 shuffles are legal
10391 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10392 return Op;
10393
10394 if (Subtarget.hasP9Vector() &&
10395 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10396 isLittleEndian)) {
10397 if (V2.isUndef())
10398 V2 = V1;
10399 else if (Swap)
10400 std::swap(V1, V2);
10401 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10402 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10403 if (ShiftElts) {
10404 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10405 DAG.getConstant(ShiftElts, dl, MVT::i32));
10406 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10407 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10408 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10409 }
10410 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10411 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10412 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10413 }
10414
10415 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10416 SDValue SplatInsertNode;
10417 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10418 return SplatInsertNode;
10419 }
10420
10421 if (Subtarget.hasP9Altivec()) {
10422 SDValue NewISDNode;
10423 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10424 return NewISDNode;
10425
10426 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10427 return NewISDNode;
10428 }
10429
10430 if (Subtarget.hasVSX() &&
10431 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10432 if (Swap)
10433 std::swap(V1, V2);
10434 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10435 SDValue Conv2 =
10436 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10437
10438 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10439 DAG.getConstant(ShiftElts, dl, MVT::i32));
10440 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10441 }
10442
10443 if (Subtarget.hasVSX() &&
10444 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10445 if (Swap)
10446 std::swap(V1, V2);
10447 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10448 SDValue Conv2 =
10449 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10450
10451 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10452 DAG.getConstant(ShiftElts, dl, MVT::i32));
10453 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10454 }
10455
10456 if (Subtarget.hasP9Vector()) {
10457 if (PPC::isXXBRHShuffleMask(SVOp)) {
10458 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10459 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10460 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10461 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10462 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10463 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10464 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10465 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10466 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10467 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10468 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10469 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10470 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10471 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10472 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10473 }
10474 }
10475
10476 if (Subtarget.hasVSX()) {
10477 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10478 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10479
10480 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10481 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10482 DAG.getConstant(SplatIdx, dl, MVT::i32));
10483 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10484 }
10485
10486 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10487 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10488 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10489 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10490 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10491 }
10492 }
10493
10494 // Cases that are handled by instructions that take permute immediates
10495 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10496 // selected by the instruction selector.
10497 if (V2.isUndef()) {
10498 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10499 PPC::isSplatShuffleMask(SVOp, 2) ||
10500 PPC::isSplatShuffleMask(SVOp, 4) ||
10501 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10502 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10503 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10504 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10505 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10506 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10507 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10508 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10509 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10510 (Subtarget.hasP8Altivec() && (
10511 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10512 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10513 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10514 return Op;
10515 }
10516 }
10517
10518 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10519 // and produce a fixed permutation. If any of these match, do not lower to
10520 // VPERM.
10521 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10522 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10523 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10524 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10525 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10526 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10527 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10528 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10529 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10530 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10531 (Subtarget.hasP8Altivec() && (
10532 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10533 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10534 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10535 return Op;
10536
10537 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10538 // perfect shuffle table to emit an optimal matching sequence.
10539 ArrayRef<int> PermMask = SVOp->getMask();
10540
10541 if (!DisablePerfectShuffle && !isLittleEndian) {
10542 unsigned PFIndexes[4];
10543 bool isFourElementShuffle = true;
10544 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10545 ++i) { // Element number
10546 unsigned EltNo = 8; // Start out undef.
10547 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10548 if (PermMask[i * 4 + j] < 0)
10549 continue; // Undef, ignore it.
10550
10551 unsigned ByteSource = PermMask[i * 4 + j];
10552 if ((ByteSource & 3) != j) {
10553 isFourElementShuffle = false;
10554 break;
10555 }
10556
10557 if (EltNo == 8) {
10558 EltNo = ByteSource / 4;
10559 } else if (EltNo != ByteSource / 4) {
10560 isFourElementShuffle = false;
10561 break;
10562 }
10563 }
10564 PFIndexes[i] = EltNo;
10565 }
10566
10567 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10568 // perfect shuffle vector to determine if it is cost effective to do this as
10569 // discrete instructions, or whether we should use a vperm.
10570 // For now, we skip this for little endian until such time as we have a
10571 // little-endian perfect shuffle table.
10572 if (isFourElementShuffle) {
10573 // Compute the index in the perfect shuffle table.
10574 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10575 PFIndexes[2] * 9 + PFIndexes[3];
10576
10577 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10578 unsigned Cost = (PFEntry >> 30);
10579
10580 // Determining when to avoid vperm is tricky. Many things affect the cost
10581 // of vperm, particularly how many times the perm mask needs to be
10582 // computed. For example, if the perm mask can be hoisted out of a loop or
10583 // is already used (perhaps because there are multiple permutes with the
10584 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10585 // permute mask out of the loop requires an extra register.
10586 //
10587 // As a compromise, we only emit discrete instructions if the shuffle can
10588 // be generated in 3 or fewer operations. When we have loop information
10589 // available, if this block is within a loop, we should avoid using vperm
10590 // for 3-operation perms and use a constant pool load instead.
10591 if (Cost < 3)
10592 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10593 }
10594 }
10595
10596 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10597 // vector that will get spilled to the constant pool.
10598 if (V2.isUndef()) V2 = V1;
10599
10600 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10601}
10602
10603SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10604 ArrayRef<int> PermMask, EVT VT,
10605 SDValue V1, SDValue V2) const {
10606 unsigned Opcode = PPCISD::VPERM;
10607 EVT ValType = V1.getValueType();
10608 SDLoc dl(Op);
10609 bool NeedSwap = false;
10610 bool isLittleEndian = Subtarget.isLittleEndian();
10611 bool isPPC64 = Subtarget.isPPC64();
10612
10613 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10614 (V1->hasOneUse() || V2->hasOneUse())) {
10615 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10616 "XXPERM instead\n");
10617 Opcode = PPCISD::XXPERM;
10618
10619 // The second input to XXPERM is also an output so if the second input has
10620 // multiple uses then copying is necessary, as a result we want the
10621 // single-use operand to be used as the second input to prevent copying.
10622 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10623 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10624 std::swap(V1, V2);
10625 NeedSwap = !NeedSwap;
10626 }
10627 }
10628
10629 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10630 // that it is in input element units, not in bytes. Convert now.
10631
10632 // For little endian, the order of the input vectors is reversed, and
10633 // the permutation mask is complemented with respect to 31. This is
10634 // necessary to produce proper semantics with the big-endian-based vperm
10635 // instruction.
10636 EVT EltVT = V1.getValueType().getVectorElementType();
10637 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10638
10639 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10640 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10641
10642 /*
10643 Vectors will be appended like so: [ V1 | v2 ]
10644 XXSWAPD on V1:
10645 [ A | B | C | D ] -> [ C | D | A | B ]
10646 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10647 i.e. index of A, B += 8, and index of C, D -= 8.
10648 XXSWAPD on V2:
10649 [ E | F | G | H ] -> [ G | H | E | F ]
10650 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10651 i.e. index of E, F += 8, index of G, H -= 8
10652 Swap V1 and V2:
10653 [ V1 | V2 ] -> [ V2 | V1 ]
10654 0-15 16-31 0-15 16-31
10655 i.e. index of V1 += 16, index of V2 -= 16
10656 */
10657
10658 SmallVector<SDValue, 16> ResultMask;
10659 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10660 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10661
10662 if (V1HasXXSWAPD) {
10663 if (SrcElt < 8)
10664 SrcElt += 8;
10665 else if (SrcElt < 16)
10666 SrcElt -= 8;
10667 }
10668 if (V2HasXXSWAPD) {
10669 if (SrcElt > 23)
10670 SrcElt -= 8;
10671 else if (SrcElt > 15)
10672 SrcElt += 8;
10673 }
10674 if (NeedSwap) {
10675 if (SrcElt < 16)
10676 SrcElt += 16;
10677 else
10678 SrcElt -= 16;
10679 }
10680 for (unsigned j = 0; j != BytesPerElement; ++j)
10681 if (isLittleEndian)
10682 ResultMask.push_back(
10683 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10684 else
10685 ResultMask.push_back(
10686 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10687 }
10688
10689 if (V1HasXXSWAPD) {
10690 dl = SDLoc(V1->getOperand(0));
10691 V1 = V1->getOperand(0)->getOperand(1);
10692 }
10693 if (V2HasXXSWAPD) {
10694 dl = SDLoc(V2->getOperand(0));
10695 V2 = V2->getOperand(0)->getOperand(1);
10696 }
10697
10698 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10699 if (ValType != MVT::v2f64)
10700 V1 = DAG.getBitcast(MVT::v2f64, V1);
10701 if (V2.getValueType() != MVT::v2f64)
10702 V2 = DAG.getBitcast(MVT::v2f64, V2);
10703 }
10704
10705 ShufflesHandledWithVPERM++;
10706 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10707 LLVM_DEBUG({
10708 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10709 if (Opcode == PPCISD::XXPERM) {
10710 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10711 } else {
10712 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10713 }
10714 SVOp->dump();
10715 dbgs() << "With the following permute control vector:\n";
10716 VPermMask.dump();
10717 });
10718
10719 if (Opcode == PPCISD::XXPERM)
10720 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10721
10722 // Only need to place items backwards in LE,
10723 // the mask was properly calculated.
10724 if (isLittleEndian)
10725 std::swap(V1, V2);
10726
10727 SDValue VPERMNode =
10728 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10729
10730 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10731 return VPERMNode;
10732}
10733
10734/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10735/// vector comparison. If it is, return true and fill in Opc/isDot with
10736/// information about the intrinsic.
10737static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10738 bool &isDot, const PPCSubtarget &Subtarget) {
10739 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10740 CompareOpc = -1;
10741 isDot = false;
10742 switch (IntrinsicID) {
10743 default:
10744 return false;
10745 // Comparison predicates.
10746 case Intrinsic::ppc_altivec_vcmpbfp_p:
10747 CompareOpc = 966;
10748 isDot = true;
10749 break;
10750 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10751 CompareOpc = 198;
10752 isDot = true;
10753 break;
10754 case Intrinsic::ppc_altivec_vcmpequb_p:
10755 CompareOpc = 6;
10756 isDot = true;
10757 break;
10758 case Intrinsic::ppc_altivec_vcmpequh_p:
10759 CompareOpc = 70;
10760 isDot = true;
10761 break;
10762 case Intrinsic::ppc_altivec_vcmpequw_p:
10763 CompareOpc = 134;
10764 isDot = true;
10765 break;
10766 case Intrinsic::ppc_altivec_vcmpequd_p:
10767 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10768 CompareOpc = 199;
10769 isDot = true;
10770 } else
10771 return false;
10772 break;
10773 case Intrinsic::ppc_altivec_vcmpneb_p:
10774 case Intrinsic::ppc_altivec_vcmpneh_p:
10775 case Intrinsic::ppc_altivec_vcmpnew_p:
10776 case Intrinsic::ppc_altivec_vcmpnezb_p:
10777 case Intrinsic::ppc_altivec_vcmpnezh_p:
10778 case Intrinsic::ppc_altivec_vcmpnezw_p:
10779 if (Subtarget.hasP9Altivec()) {
10780 switch (IntrinsicID) {
10781 default:
10782 llvm_unreachable("Unknown comparison intrinsic.");
10783 case Intrinsic::ppc_altivec_vcmpneb_p:
10784 CompareOpc = 7;
10785 break;
10786 case Intrinsic::ppc_altivec_vcmpneh_p:
10787 CompareOpc = 71;
10788 break;
10789 case Intrinsic::ppc_altivec_vcmpnew_p:
10790 CompareOpc = 135;
10791 break;
10792 case Intrinsic::ppc_altivec_vcmpnezb_p:
10793 CompareOpc = 263;
10794 break;
10795 case Intrinsic::ppc_altivec_vcmpnezh_p:
10796 CompareOpc = 327;
10797 break;
10798 case Intrinsic::ppc_altivec_vcmpnezw_p:
10799 CompareOpc = 391;
10800 break;
10801 }
10802 isDot = true;
10803 } else
10804 return false;
10805 break;
10806 case Intrinsic::ppc_altivec_vcmpgefp_p:
10807 CompareOpc = 454;
10808 isDot = true;
10809 break;
10810 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10811 CompareOpc = 710;
10812 isDot = true;
10813 break;
10814 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10815 CompareOpc = 774;
10816 isDot = true;
10817 break;
10818 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10819 CompareOpc = 838;
10820 isDot = true;
10821 break;
10822 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10823 CompareOpc = 902;
10824 isDot = true;
10825 break;
10826 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10827 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10828 CompareOpc = 967;
10829 isDot = true;
10830 } else
10831 return false;
10832 break;
10833 case Intrinsic::ppc_altivec_vcmpgtub_p:
10834 CompareOpc = 518;
10835 isDot = true;
10836 break;
10837 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10838 CompareOpc = 582;
10839 isDot = true;
10840 break;
10841 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10842 CompareOpc = 646;
10843 isDot = true;
10844 break;
10845 case Intrinsic::ppc_altivec_vcmpgtud_p:
10846 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10847 CompareOpc = 711;
10848 isDot = true;
10849 } else
10850 return false;
10851 break;
10852
10853 case Intrinsic::ppc_altivec_vcmpequq:
10854 case Intrinsic::ppc_altivec_vcmpgtsq:
10855 case Intrinsic::ppc_altivec_vcmpgtuq:
10856 if (!Subtarget.isISA3_1())
10857 return false;
10858 switch (IntrinsicID) {
10859 default:
10860 llvm_unreachable("Unknown comparison intrinsic.");
10861 case Intrinsic::ppc_altivec_vcmpequq:
10862 CompareOpc = 455;
10863 break;
10864 case Intrinsic::ppc_altivec_vcmpgtsq:
10865 CompareOpc = 903;
10866 break;
10867 case Intrinsic::ppc_altivec_vcmpgtuq:
10868 CompareOpc = 647;
10869 break;
10870 }
10871 break;
10872
10873 // VSX predicate comparisons use the same infrastructure
10874 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10875 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10876 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10877 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10878 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10879 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10880 if (Subtarget.hasVSX()) {
10881 switch (IntrinsicID) {
10882 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10883 CompareOpc = 99;
10884 break;
10885 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10886 CompareOpc = 115;
10887 break;
10888 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10889 CompareOpc = 107;
10890 break;
10891 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10892 CompareOpc = 67;
10893 break;
10894 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10895 CompareOpc = 83;
10896 break;
10897 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10898 CompareOpc = 75;
10899 break;
10900 }
10901 isDot = true;
10902 } else
10903 return false;
10904 break;
10905
10906 // Normal Comparisons.
10907 case Intrinsic::ppc_altivec_vcmpbfp:
10908 CompareOpc = 966;
10909 break;
10910 case Intrinsic::ppc_altivec_vcmpeqfp:
10911 CompareOpc = 198;
10912 break;
10913 case Intrinsic::ppc_altivec_vcmpequb:
10914 CompareOpc = 6;
10915 break;
10916 case Intrinsic::ppc_altivec_vcmpequh:
10917 CompareOpc = 70;
10918 break;
10919 case Intrinsic::ppc_altivec_vcmpequw:
10920 CompareOpc = 134;
10921 break;
10922 case Intrinsic::ppc_altivec_vcmpequd:
10923 if (Subtarget.hasP8Altivec())
10924 CompareOpc = 199;
10925 else
10926 return false;
10927 break;
10928 case Intrinsic::ppc_altivec_vcmpneb:
10929 case Intrinsic::ppc_altivec_vcmpneh:
10930 case Intrinsic::ppc_altivec_vcmpnew:
10931 case Intrinsic::ppc_altivec_vcmpnezb:
10932 case Intrinsic::ppc_altivec_vcmpnezh:
10933 case Intrinsic::ppc_altivec_vcmpnezw:
10934 if (Subtarget.hasP9Altivec())
10935 switch (IntrinsicID) {
10936 default:
10937 llvm_unreachable("Unknown comparison intrinsic.");
10938 case Intrinsic::ppc_altivec_vcmpneb:
10939 CompareOpc = 7;
10940 break;
10941 case Intrinsic::ppc_altivec_vcmpneh:
10942 CompareOpc = 71;
10943 break;
10944 case Intrinsic::ppc_altivec_vcmpnew:
10945 CompareOpc = 135;
10946 break;
10947 case Intrinsic::ppc_altivec_vcmpnezb:
10948 CompareOpc = 263;
10949 break;
10950 case Intrinsic::ppc_altivec_vcmpnezh:
10951 CompareOpc = 327;
10952 break;
10953 case Intrinsic::ppc_altivec_vcmpnezw:
10954 CompareOpc = 391;
10955 break;
10956 }
10957 else
10958 return false;
10959 break;
10960 case Intrinsic::ppc_altivec_vcmpgefp:
10961 CompareOpc = 454;
10962 break;
10963 case Intrinsic::ppc_altivec_vcmpgtfp:
10964 CompareOpc = 710;
10965 break;
10966 case Intrinsic::ppc_altivec_vcmpgtsb:
10967 CompareOpc = 774;
10968 break;
10969 case Intrinsic::ppc_altivec_vcmpgtsh:
10970 CompareOpc = 838;
10971 break;
10972 case Intrinsic::ppc_altivec_vcmpgtsw:
10973 CompareOpc = 902;
10974 break;
10975 case Intrinsic::ppc_altivec_vcmpgtsd:
10976 if (Subtarget.hasP8Altivec())
10977 CompareOpc = 967;
10978 else
10979 return false;
10980 break;
10981 case Intrinsic::ppc_altivec_vcmpgtub:
10982 CompareOpc = 518;
10983 break;
10984 case Intrinsic::ppc_altivec_vcmpgtuh:
10985 CompareOpc = 582;
10986 break;
10987 case Intrinsic::ppc_altivec_vcmpgtuw:
10988 CompareOpc = 646;
10989 break;
10990 case Intrinsic::ppc_altivec_vcmpgtud:
10991 if (Subtarget.hasP8Altivec())
10992 CompareOpc = 711;
10993 else
10994 return false;
10995 break;
10996 case Intrinsic::ppc_altivec_vcmpequq_p:
10997 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10998 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10999 if (!Subtarget.isISA3_1())
11000 return false;
11001 switch (IntrinsicID) {
11002 default:
11003 llvm_unreachable("Unknown comparison intrinsic.");
11004 case Intrinsic::ppc_altivec_vcmpequq_p:
11005 CompareOpc = 455;
11006 break;
11007 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11008 CompareOpc = 903;
11009 break;
11010 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11011 CompareOpc = 647;
11012 break;
11013 }
11014 isDot = true;
11015 break;
11016 }
11017 return true;
11018}
11019
11020/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11021/// lower, do it, otherwise return null.
11022SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11023 SelectionDAG &DAG) const {
11024 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11025
11026 SDLoc dl(Op);
11027 // Note: BCD instructions expect the immediate operand in vector form (v4i32),
11028 // but the builtin provides it as a scalar. To satisfy the instruction
11029 // encoding, we splat the scalar across all lanes using SPLAT_VECTOR.
11030 auto MapNodeWithSplatVector =
11031 [&](unsigned Opcode,
11032 std::initializer_list<SDValue> ExtraOps = {}) -> SDValue {
11033 SDValue SplatVal =
11034 DAG.getNode(ISD::SPLAT_VECTOR, dl, MVT::v4i32, Op.getOperand(2));
11035
11036 SmallVector<SDValue, 4> Ops{SplatVal, Op.getOperand(1)};
11037 Ops.append(ExtraOps.begin(), ExtraOps.end());
11038 return DAG.getNode(Opcode, dl, MVT::v16i8, Ops);
11039 };
11040
11041 switch (IntrinsicID) {
11042 case Intrinsic::thread_pointer:
11043 // Reads the thread pointer register, used for __builtin_thread_pointer.
11044 if (Subtarget.isPPC64())
11045 return DAG.getRegister(PPC::X13, MVT::i64);
11046 return DAG.getRegister(PPC::R2, MVT::i32);
11047
11048 case Intrinsic::ppc_rldimi: {
11049 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11050 SDValue Src = Op.getOperand(1);
11051 APInt Mask = Op.getConstantOperandAPInt(4);
11052 if (Mask.isZero())
11053 return Op.getOperand(2);
11054 if (Mask.isAllOnes())
11055 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11056 uint64_t SH = Op.getConstantOperandVal(3);
11057 unsigned MB = 0, ME = 0;
11058 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11059 report_fatal_error("invalid rldimi mask!");
11060 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11061 if (ME < 63 - SH) {
11062 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11063 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11064 } else if (ME > 63 - SH) {
11065 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11066 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11067 }
11068 return SDValue(
11069 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11070 {Op.getOperand(2), Src,
11071 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11072 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11073 0);
11074 }
11075
11076 case Intrinsic::ppc_rlwimi: {
11077 APInt Mask = Op.getConstantOperandAPInt(4);
11078 if (Mask.isZero())
11079 return Op.getOperand(2);
11080 if (Mask.isAllOnes())
11081 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11082 Op.getOperand(3));
11083 unsigned MB = 0, ME = 0;
11084 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11085 report_fatal_error("invalid rlwimi mask!");
11086 return SDValue(DAG.getMachineNode(
11087 PPC::RLWIMI, dl, MVT::i32,
11088 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11089 DAG.getTargetConstant(MB, dl, MVT::i32),
11090 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11091 0);
11092 }
11093
11094 case Intrinsic::ppc_bcdshift:
11095 return MapNodeWithSplatVector(PPCISD::BCDSHIFT, {Op.getOperand(3)});
11096 case Intrinsic::ppc_bcdshiftround:
11097 return MapNodeWithSplatVector(PPCISD::BCDSHIFTROUND, {Op.getOperand(3)});
11098 case Intrinsic::ppc_bcdtruncate:
11099 return MapNodeWithSplatVector(PPCISD::BCDTRUNC, {Op.getOperand(3)});
11100 case Intrinsic::ppc_bcdunsignedtruncate:
11101 return MapNodeWithSplatVector(PPCISD::BCDUTRUNC);
11102 case Intrinsic::ppc_bcdunsignedshift:
11103 return MapNodeWithSplatVector(PPCISD::BCDUSHIFT);
11104
11105 case Intrinsic::ppc_rlwnm: {
11106 if (Op.getConstantOperandVal(3) == 0)
11107 return DAG.getConstant(0, dl, MVT::i32);
11108 unsigned MB = 0, ME = 0;
11109 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11110 report_fatal_error("invalid rlwnm mask!");
11111 return SDValue(
11112 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11113 {Op.getOperand(1), Op.getOperand(2),
11114 DAG.getTargetConstant(MB, dl, MVT::i32),
11115 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11116 0);
11117 }
11118
11119 case Intrinsic::ppc_mma_disassemble_acc: {
11120 if (Subtarget.isISAFuture()) {
11121 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11122 SDValue WideVec =
11123 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11124 Op.getOperand(1)),
11125 0);
11127 SDValue Value = SDValue(WideVec.getNode(), 0);
11128 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11129
11130 SDValue Extract;
11131 Extract = DAG.getNode(
11132 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11133 Subtarget.isLittleEndian() ? Value2 : Value,
11134 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11135 dl, getPointerTy(DAG.getDataLayout())));
11136 RetOps.push_back(Extract);
11137 Extract = DAG.getNode(
11138 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11139 Subtarget.isLittleEndian() ? Value2 : Value,
11140 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11141 dl, getPointerTy(DAG.getDataLayout())));
11142 RetOps.push_back(Extract);
11143 Extract = DAG.getNode(
11144 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11145 Subtarget.isLittleEndian() ? Value : Value2,
11146 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11147 dl, getPointerTy(DAG.getDataLayout())));
11148 RetOps.push_back(Extract);
11149 Extract = DAG.getNode(
11150 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11151 Subtarget.isLittleEndian() ? Value : Value2,
11152 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11153 dl, getPointerTy(DAG.getDataLayout())));
11154 RetOps.push_back(Extract);
11155 return DAG.getMergeValues(RetOps, dl);
11156 }
11157 [[fallthrough]];
11158 }
11159 case Intrinsic::ppc_vsx_disassemble_pair: {
11160 int NumVecs = 2;
11161 SDValue WideVec = Op.getOperand(1);
11162 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11163 NumVecs = 4;
11164 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11165 }
11167 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11168 SDValue Extract = DAG.getNode(
11169 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11170 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11171 : VecNo,
11172 dl, getPointerTy(DAG.getDataLayout())));
11173 RetOps.push_back(Extract);
11174 }
11175 return DAG.getMergeValues(RetOps, dl);
11176 }
11177
11178 case Intrinsic::ppc_build_dmr: {
11181 for (int i = 1; i < 9; i += 2) {
11182 SDValue Hi = Op.getOperand(i);
11183 SDValue Lo = Op.getOperand(i + 1);
11184 if (Hi->getOpcode() == ISD::LOAD)
11185 Chains.push_back(Hi.getValue(1));
11186 if (Lo->getOpcode() == ISD::LOAD)
11187 Chains.push_back(Lo.getValue(1));
11188 Pairs.push_back(
11189 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11190 }
11191 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11192 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11193 return DAG.getMergeValues({Value, TF}, dl);
11194 }
11195
11196 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11197 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11198 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11199 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11200 "Specify P of 0 or 1 for lower or upper 512 bytes");
11201 unsigned HiLo = Idx->getSExtValue();
11202 unsigned Opcode;
11203 unsigned Subx;
11204 if (HiLo == 0) {
11205 Opcode = PPC::DMXXEXTFDMR512;
11206 Subx = PPC::sub_wacc_lo;
11207 } else {
11208 Opcode = PPC::DMXXEXTFDMR512_HI;
11209 Subx = PPC::sub_wacc_hi;
11210 }
11211 SDValue Subreg(
11212 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11213 Op.getOperand(1),
11214 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11215 0);
11216 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11217 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11218 }
11219
11220 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11221 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11222 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11223 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11224 "Specify a dmr row pair 0-3");
11225 unsigned IdxVal = Idx->getSExtValue();
11226 unsigned Subx;
11227 switch (IdxVal) {
11228 case 0:
11229 Subx = PPC::sub_dmrrowp0;
11230 break;
11231 case 1:
11232 Subx = PPC::sub_dmrrowp1;
11233 break;
11234 case 2:
11235 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11236 break;
11237 case 3:
11238 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11239 break;
11240 }
11241 SDValue Subreg(
11242 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11243 Op.getOperand(1),
11244 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11245 0);
11246 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11247 return SDValue(
11248 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11249 0);
11250 }
11251
11252 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11253 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11254 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11255 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11256 "Specify P of 0 or 1 for lower or upper 512 bytes");
11257 unsigned HiLo = Idx->getSExtValue();
11258 unsigned Opcode;
11259 unsigned Subx;
11260 if (HiLo == 0) {
11261 Opcode = PPCISD::INST512;
11262 Subx = PPC::sub_wacc_lo;
11263 } else {
11264 Opcode = PPCISD::INST512HI;
11265 Subx = PPC::sub_wacc_hi;
11266 }
11267 SDValue Wacc = DAG.getNode(Opcode, dl, MVT::v512i1, Op.getOperand(2),
11268 Op.getOperand(3));
11269 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11270 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11271 Op.getOperand(1), Wacc, SubReg),
11272 0);
11273 }
11274
11275 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11276 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11277 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11278 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11279 "Specify a dmr row pair 0-3");
11280 unsigned IdxVal = Idx->getSExtValue();
11281 unsigned Subx;
11282 switch (IdxVal) {
11283 case 0:
11284 Subx = PPC::sub_dmrrowp0;
11285 break;
11286 case 1:
11287 Subx = PPC::sub_dmrrowp1;
11288 break;
11289 case 2:
11290 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11291 break;
11292 case 3:
11293 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11294 break;
11295 }
11296 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11297 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11298 SDValue DMRRowp =
11299 DAG.getNode(PPCISD::INST256, dl, MVT::v256i1, Op.getOperand(2), P);
11300 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11301 Op.getOperand(1), DMRRowp, SubReg),
11302 0);
11303 }
11304
11305 case Intrinsic::ppc_mma_xxmfacc:
11306 case Intrinsic::ppc_mma_xxmtacc: {
11307 // Allow pre-isa-future subtargets to lower as normal.
11308 if (!Subtarget.isISAFuture())
11309 return SDValue();
11310 // The intrinsics for xxmtacc and xxmfacc take one argument of
11311 // type v512i1, for future cpu the corresponding wacc instruction
11312 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11313 // the need to produce the xxm[t|f]acc.
11314 SDValue WideVec = Op.getOperand(1);
11315 DAG.ReplaceAllUsesWith(Op, WideVec);
11316 return SDValue();
11317 }
11318
11319 case Intrinsic::ppc_unpack_longdouble: {
11320 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11321 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11322 "Argument of long double unpack must be 0 or 1!");
11323 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11324 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11325 Idx->getValueType(0)));
11326 }
11327
11328 case Intrinsic::ppc_compare_exp_lt:
11329 case Intrinsic::ppc_compare_exp_gt:
11330 case Intrinsic::ppc_compare_exp_eq:
11331 case Intrinsic::ppc_compare_exp_uo: {
11332 unsigned Pred;
11333 switch (IntrinsicID) {
11334 case Intrinsic::ppc_compare_exp_lt:
11335 Pred = PPC::PRED_LT;
11336 break;
11337 case Intrinsic::ppc_compare_exp_gt:
11338 Pred = PPC::PRED_GT;
11339 break;
11340 case Intrinsic::ppc_compare_exp_eq:
11341 Pred = PPC::PRED_EQ;
11342 break;
11343 case Intrinsic::ppc_compare_exp_uo:
11344 Pred = PPC::PRED_UN;
11345 break;
11346 }
11347 return SDValue(
11348 DAG.getMachineNode(
11349 PPC::SELECT_CC_I4, dl, MVT::i32,
11350 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11351 Op.getOperand(1), Op.getOperand(2)),
11352 0),
11353 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11354 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11355 0);
11356 }
11357 case Intrinsic::ppc_test_data_class: {
11358 EVT OpVT = Op.getOperand(1).getValueType();
11359 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11360 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11361 : PPC::XSTSTDCSP);
11362 // Lower __builtin_ppc_test_data_class(value, mask) to XSTSTDC* instruction.
11363 // The XSTSTDC* instructions test if a floating-point value matches any of
11364 // the data classes specified in the mask, setting CR field bits
11365 // accordingly. We need to extract the EQ bit (bit 2) from the CR field and
11366 // convert it to an integer result (1 if match, 0 if no match).
11367 //
11368 // Note: Operands are swapped because XSTSTDC* expects (mask, value) but the
11369 // intrinsic provides (value, mask) as Op.getOperand(1) and
11370 // Op.getOperand(2).
11371 SDValue TestDataClass =
11372 SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32,
11373 {Op.getOperand(2), Op.getOperand(1)}),
11374 0);
11375 if (Subtarget.isISA3_1()) {
11376 // ISA 3.1+: Use SETBC instruction to directly convert CR bit to integer.
11377 // This is more efficient than the SELECT_CC approach used in earlier
11378 // ISAs.
11379 SDValue SubRegIdx = DAG.getTargetConstant(PPC::sub_eq, dl, MVT::i32);
11380 SDValue CRBit =
11381 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11382 TestDataClass, SubRegIdx),
11383 0);
11384
11385 return DAG.getNode(PPCISD::SETBC, dl, MVT::i32, CRBit);
11386 }
11387
11388 // Pre-ISA 3.1: Use SELECT_CC to convert CR field to integer (1 or 0).
11389 return SDValue(
11390 DAG.getMachineNode(PPC::SELECT_CC_I4, dl, MVT::i32,
11391 {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
11392 DAG.getConstant(0, dl, MVT::i32),
11393 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11394 0);
11395 }
11396 case Intrinsic::ppc_fnmsub: {
11397 EVT VT = Op.getOperand(1).getValueType();
11398 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11399 return DAG.getNode(
11400 ISD::FNEG, dl, VT,
11401 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11402 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11403 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11404 Op.getOperand(2), Op.getOperand(3));
11405 }
11406 case Intrinsic::ppc_convert_f128_to_ppcf128:
11407 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11408 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11409 ? RTLIB::CONVERT_PPCF128_F128
11410 : RTLIB::CONVERT_F128_PPCF128;
11411 MakeLibCallOptions CallOptions;
11412 std::pair<SDValue, SDValue> Result =
11413 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11414 dl, SDValue());
11415 return Result.first;
11416 }
11417 case Intrinsic::ppc_maxfe:
11418 case Intrinsic::ppc_maxfl:
11419 case Intrinsic::ppc_maxfs:
11420 case Intrinsic::ppc_minfe:
11421 case Intrinsic::ppc_minfl:
11422 case Intrinsic::ppc_minfs: {
11423 EVT VT = Op.getValueType();
11424 assert(
11425 all_of(Op->ops().drop_front(4),
11426 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11427 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11428 (void)VT;
11430 if (IntrinsicID == Intrinsic::ppc_minfe ||
11431 IntrinsicID == Intrinsic::ppc_minfl ||
11432 IntrinsicID == Intrinsic::ppc_minfs)
11433 CC = ISD::SETLT;
11434 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11435 SDValue Res = Op.getOperand(I);
11436 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11437 Res =
11438 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11439 }
11440 return Res;
11441 }
11442 }
11443
11444 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11445 // opcode number of the comparison.
11446 int CompareOpc;
11447 bool isDot;
11448 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11449 return SDValue(); // Don't custom lower most intrinsics.
11450
11451 // If this is a non-dot comparison, make the VCMP node and we are done.
11452 if (!isDot) {
11453 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11454 Op.getOperand(1), Op.getOperand(2),
11455 DAG.getConstant(CompareOpc, dl, MVT::i32));
11456 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11457 }
11458
11459 // Create the PPCISD altivec 'dot' comparison node.
11460 SDValue Ops[] = {
11461 Op.getOperand(2), // LHS
11462 Op.getOperand(3), // RHS
11463 DAG.getConstant(CompareOpc, dl, MVT::i32)
11464 };
11465 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11466 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11467
11468 // Unpack the result based on how the target uses it.
11469 unsigned BitNo; // Bit # of CR6.
11470 bool InvertBit; // Invert result?
11471 unsigned Bitx;
11472 unsigned SetOp;
11473 switch (Op.getConstantOperandVal(1)) {
11474 default: // Can't happen, don't crash on invalid number though.
11475 case 0: // Return the value of the EQ bit of CR6.
11476 BitNo = 0;
11477 InvertBit = false;
11478 Bitx = PPC::sub_eq;
11479 SetOp = PPCISD::SETBC;
11480 break;
11481 case 1: // Return the inverted value of the EQ bit of CR6.
11482 BitNo = 0;
11483 InvertBit = true;
11484 Bitx = PPC::sub_eq;
11485 SetOp = PPCISD::SETBCR;
11486 break;
11487 case 2: // Return the value of the LT bit of CR6.
11488 BitNo = 2;
11489 InvertBit = false;
11490 Bitx = PPC::sub_lt;
11491 SetOp = PPCISD::SETBC;
11492 break;
11493 case 3: // Return the inverted value of the LT bit of CR6.
11494 BitNo = 2;
11495 InvertBit = true;
11496 Bitx = PPC::sub_lt;
11497 SetOp = PPCISD::SETBCR;
11498 break;
11499 }
11500
11501 SDValue GlueOp = CompNode.getValue(1);
11502 if (Subtarget.isISA3_1()) {
11503 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11504 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11505 SDValue CRBit =
11506 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11507 CR6Reg, SubRegIdx, GlueOp),
11508 0);
11509 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11510 }
11511
11512 // Now that we have the comparison, emit a copy from the CR to a GPR.
11513 // This is flagged to the above dot comparison.
11514 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11515 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11516
11517 // Shift the bit into the low position.
11518 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11519 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11520 // Isolate the bit.
11521 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11522 DAG.getConstant(1, dl, MVT::i32));
11523
11524 // If we are supposed to, toggle the bit.
11525 if (InvertBit)
11526 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11527 DAG.getConstant(1, dl, MVT::i32));
11528 return Flags;
11529}
11530
11531SDValue PPCTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11532 SelectionDAG &DAG) const {
11533 unsigned IntrinsicID = Op.getConstantOperandVal(1);
11534 SDLoc dl(Op);
11535 switch (IntrinsicID) {
11536 case Intrinsic::ppc_amo_lwat_csne:
11537 case Intrinsic::ppc_amo_ldat_csne:
11538 SDValue Chain = Op.getOperand(0);
11539 SDValue Ptr = Op.getOperand(2);
11540 SDValue CmpVal = Op.getOperand(3);
11541 SDValue NewVal = Op.getOperand(4);
11542
11543 EVT VT = IntrinsicID == Intrinsic::ppc_amo_ldat_csne ? MVT::i64 : MVT::i32;
11544 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
11545 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
11546
11548 Args.emplace_back(DAG.getUNDEF(MVT::i64),
11550 Args.emplace_back(CmpVal, Ty);
11551 Args.emplace_back(NewVal, Ty);
11552 Args.emplace_back(Ptr, IntPtrTy);
11553
11554 // Lower to dummy call to use ABI for consecutive register allocation.
11555 // Places return value, compare value, and new value in X3/X4/X5 as required
11556 // by lwat/ldat FC=16, avoiding a new register class for 3 adjacent
11557 // registers.
11558 const char *SymName = IntrinsicID == Intrinsic::ppc_amo_ldat_csne
11559 ? "__ldat_csne_pseudo"
11560 : "__lwat_csne_pseudo";
11561 SDValue Callee =
11562 DAG.getExternalSymbol(SymName, getPointerTy(DAG.getDataLayout()));
11563
11564 TargetLowering::CallLoweringInfo CLI(DAG);
11565 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(CallingConv::C, Ty, Callee,
11566 std::move(Args));
11567
11568 auto Result = LowerCallTo(CLI);
11569 return DAG.getMergeValues({Result.first, Result.second}, dl);
11570 }
11571 return SDValue();
11572}
11573
11574SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11575 SelectionDAG &DAG) const {
11576 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11577 // the beginning of the argument list.
11578 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11579 SDLoc DL(Op);
11580 switch (Op.getConstantOperandVal(ArgStart)) {
11581 case Intrinsic::ppc_cfence: {
11582 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11583 SDValue Val = Op.getOperand(ArgStart + 1);
11584 EVT Ty = Val.getValueType();
11585 if (Ty == MVT::i128) {
11586 // FIXME: Testing one of two paired registers is sufficient to guarantee
11587 // ordering?
11588 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11589 }
11590 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11591 return SDValue(
11592 DAG.getMachineNode(
11593 Opcode, DL, MVT::Other,
11594 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11595 Op.getOperand(0)),
11596 0);
11597 }
11598 case Intrinsic::ppc_disassemble_dmr: {
11599 return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
11600 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11601 }
11602 case Intrinsic::ppc_amo_stwat:
11603 case Intrinsic::ppc_amo_stdat: {
11604 SDLoc dl(Op);
11605 SDValue Chain = Op.getOperand(0);
11606 SDValue Ptr = Op.getOperand(ArgStart + 1);
11607 SDValue Val = Op.getOperand(ArgStart + 2);
11608 SDValue FC = Op.getOperand(ArgStart + 3);
11609
11610 return DAG.getNode(PPCISD::STAT, dl, MVT::Other, Chain, Val, Ptr, FC);
11611 }
11612 default:
11613 break;
11614 }
11615 return SDValue();
11616}
11617
11618// Lower scalar BSWAP64 to xxbrd.
11619SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11620 SDLoc dl(Op);
11621 if (!Subtarget.isPPC64())
11622 return Op;
11623 // MTVSRDD
11624 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11625 Op.getOperand(0));
11626 // XXBRD
11627 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11628 // MFVSRD
11629 int VectorIndex = 0;
11630 if (Subtarget.isLittleEndian())
11631 VectorIndex = 1;
11632 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11633 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11634 return Op;
11635}
11636
11637// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11638// compared to a value that is atomically loaded (atomic loads zero-extend).
11639SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11640 SelectionDAG &DAG) const {
11641 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11642 "Expecting an atomic compare-and-swap here.");
11643 SDLoc dl(Op);
11644 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11645 EVT MemVT = AtomicNode->getMemoryVT();
11646 if (MemVT.getSizeInBits() >= 32)
11647 return Op;
11648
11649 SDValue CmpOp = Op.getOperand(2);
11650 // If this is already correctly zero-extended, leave it alone.
11651 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11652 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11653 return Op;
11654
11655 // Clear the high bits of the compare operand.
11656 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11657 SDValue NewCmpOp =
11658 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11659 DAG.getConstant(MaskVal, dl, MVT::i32));
11660
11661 // Replace the existing compare operand with the properly zero-extended one.
11663 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11664 Ops.push_back(AtomicNode->getOperand(i));
11665 Ops[2] = NewCmpOp;
11666 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11667 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11668 auto NodeTy =
11669 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11670 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11671}
11672
11673SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11674 SelectionDAG &DAG) const {
11675 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11676 EVT MemVT = N->getMemoryVT();
11677 assert(MemVT.getSimpleVT() == MVT::i128 &&
11678 "Expect quadword atomic operations");
11679 SDLoc dl(N);
11680 unsigned Opc = N->getOpcode();
11681 switch (Opc) {
11682 case ISD::ATOMIC_LOAD: {
11683 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11684 // lowered to ppc instructions by pattern matching instruction selector.
11685 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11687 N->getOperand(0),
11688 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11689 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11690 Ops.push_back(N->getOperand(I));
11691 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11692 Ops, MemVT, N->getMemOperand());
11693 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11694 SDValue ValHi =
11695 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11696 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11697 DAG.getConstant(64, dl, MVT::i32));
11698 SDValue Val =
11699 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11700 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11701 {Val, LoadedVal.getValue(2)});
11702 }
11703 case ISD::ATOMIC_STORE: {
11704 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11705 // lowered to ppc instructions by pattern matching instruction selector.
11706 SDVTList Tys = DAG.getVTList(MVT::Other);
11708 N->getOperand(0),
11709 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11710 SDValue Val = N->getOperand(1);
11711 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11712 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11713 DAG.getConstant(64, dl, MVT::i32));
11714 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11715 Ops.push_back(ValLo);
11716 Ops.push_back(ValHi);
11717 Ops.push_back(N->getOperand(2));
11718 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11719 N->getMemOperand());
11720 }
11721 default:
11722 llvm_unreachable("Unexpected atomic opcode");
11723 }
11724}
11725
11727 SelectionDAG &DAG,
11728 const PPCSubtarget &Subtarget) {
11729 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11730
11731 enum DataClassMask {
11732 DC_NAN = 1 << 6,
11733 DC_NEG_INF = 1 << 4,
11734 DC_POS_INF = 1 << 5,
11735 DC_NEG_ZERO = 1 << 2,
11736 DC_POS_ZERO = 1 << 3,
11737 DC_NEG_SUBNORM = 1,
11738 DC_POS_SUBNORM = 1 << 1,
11739 };
11740
11741 EVT VT = Op.getValueType();
11742
11743 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11744 : VT == MVT::f64 ? PPC::XSTSTDCDP
11745 : PPC::XSTSTDCSP;
11746
11747 if (Mask == fcAllFlags)
11748 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11749 if (Mask == 0)
11750 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11751
11752 // When it's cheaper or necessary to test reverse flags.
11753 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11754 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11755 return DAG.getNOT(Dl, Rev, MVT::i1);
11756 }
11757
11758 // Power doesn't support testing whether a value is 'normal'. Test the rest
11759 // first, and test if it's 'not not-normal' with expected sign.
11760 if (Mask & fcNormal) {
11761 SDValue Rev(DAG.getMachineNode(
11762 TestOp, Dl, MVT::i32,
11763 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11764 DC_NEG_ZERO | DC_POS_ZERO |
11765 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11766 Dl, MVT::i32),
11767 Op),
11768 0);
11769 // Sign are stored in CR bit 0, result are in CR bit 2.
11770 SDValue Sign(
11771 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11772 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11773 0);
11774 SDValue Normal(DAG.getNOT(
11775 Dl,
11777 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11778 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11779 0),
11780 MVT::i1));
11781 if (Mask & fcPosNormal)
11782 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11783 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11784 if (Mask == fcPosNormal || Mask == fcNegNormal)
11785 return Result;
11786
11787 return DAG.getNode(
11788 ISD::OR, Dl, MVT::i1,
11789 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11790 }
11791
11792 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11793 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11794 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11795 bool IsQuiet = Mask & fcQNan;
11796 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11797
11798 // Quietness is determined by the first bit in fraction field.
11799 uint64_t QuietMask = 0;
11800 SDValue HighWord;
11801 if (VT == MVT::f128) {
11802 HighWord = DAG.getNode(
11803 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11804 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11805 QuietMask = 0x8000;
11806 } else if (VT == MVT::f64) {
11807 if (Subtarget.isPPC64()) {
11808 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11809 DAG.getBitcast(MVT::i64, Op),
11810 DAG.getConstant(1, Dl, MVT::i32));
11811 } else {
11812 SDValue Vec = DAG.getBitcast(
11813 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11814 HighWord = DAG.getNode(
11815 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11816 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11817 }
11818 QuietMask = 0x80000;
11819 } else if (VT == MVT::f32) {
11820 HighWord = DAG.getBitcast(MVT::i32, Op);
11821 QuietMask = 0x400000;
11822 }
11823 SDValue NanRes = DAG.getSetCC(
11824 Dl, MVT::i1,
11825 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11826 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11827 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11828 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11829 if (Mask == fcQNan || Mask == fcSNan)
11830 return NanRes;
11831
11832 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11833 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11834 NanRes);
11835 }
11836
11837 unsigned NativeMask = 0;
11838 if ((Mask & fcNan) == fcNan)
11839 NativeMask |= DC_NAN;
11840 if (Mask & fcNegInf)
11841 NativeMask |= DC_NEG_INF;
11842 if (Mask & fcPosInf)
11843 NativeMask |= DC_POS_INF;
11844 if (Mask & fcNegZero)
11845 NativeMask |= DC_NEG_ZERO;
11846 if (Mask & fcPosZero)
11847 NativeMask |= DC_POS_ZERO;
11848 if (Mask & fcNegSubnormal)
11849 NativeMask |= DC_NEG_SUBNORM;
11850 if (Mask & fcPosSubnormal)
11851 NativeMask |= DC_POS_SUBNORM;
11852 return SDValue(
11853 DAG.getMachineNode(
11854 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11856 TestOp, Dl, MVT::i32,
11857 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11858 0),
11859 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11860 0);
11861}
11862
11863SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11864 SelectionDAG &DAG) const {
11865 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11866 SDValue LHS = Op.getOperand(0);
11867 uint64_t RHSC = Op.getConstantOperandVal(1);
11868 SDLoc Dl(Op);
11869 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11870 if (LHS.getValueType() == MVT::ppcf128) {
11871 // The higher part determines the value class.
11872 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11873 DAG.getConstant(1, Dl, MVT::i32));
11874 }
11875
11876 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11877}
11878
11879// Adjust the length value for a load/store with length to account for the
11880// instructions requiring a left justified length, and for non-byte element
11881// types requiring scaling by element size.
11882static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11883 SelectionDAG &DAG) {
11884 SDLoc dl(Val);
11885 EVT VT = Val->getValueType(0);
11886 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11887 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11888 SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11889 return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11890}
11891
11892SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11893 auto VPLD = cast<VPLoadSDNode>(Op);
11894 bool Future = Subtarget.isISAFuture();
11895 SDLoc dl(Op);
11896 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11897 "Mask predication not supported");
11898 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11899 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
11900 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11901 unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
11902 Len = AdjustLength(Len, EltBits, !Future, DAG);
11903 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11904 VPLD->getOperand(1), Len};
11905 SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
11906 SDValue VPL =
11908 VPLD->getMemoryVT(), VPLD->getMemOperand());
11909 return VPL;
11910}
11911
11912SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11913 auto VPST = cast<VPStoreSDNode>(Op);
11914 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11915 "Mask predication not supported");
11916 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11917 SDLoc dl(Op);
11918 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
11919 unsigned EltBits =
11920 Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
11921 bool Future = Subtarget.isISAFuture();
11922 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11923 Len = AdjustLength(Len, EltBits, !Future, DAG);
11924 SDValue Ops[] = {
11925 VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11926 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
11927 VPST->getOperand(2), Len};
11928 SDVTList Tys = DAG.getVTList(MVT::Other);
11929 SDValue VPS =
11931 VPST->getMemoryVT(), VPST->getMemOperand());
11932 return VPS;
11933}
11934
11935SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11936 SelectionDAG &DAG) const {
11937 SDLoc dl(Op);
11938
11939 MachineFunction &MF = DAG.getMachineFunction();
11940 SDValue Op0 = Op.getOperand(0);
11941 EVT ValVT = Op0.getValueType();
11942 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11943 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11944 int64_t IntVal = Op.getConstantOperandVal(0);
11945 if (IntVal >= -16 && IntVal <= 15)
11946 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11947 dl);
11948 }
11949
11950 ReuseLoadInfo RLI;
11951 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11952 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11953 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11954 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11955
11956 MachineMemOperand *MMO =
11958 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11959 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11961 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11962 MVT::i32, MMO);
11963 if (RLI.ResChain)
11964 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11965 return Bits.getValue(0);
11966 }
11967
11968 // Create a stack slot that is 16-byte aligned.
11969 MachineFrameInfo &MFI = MF.getFrameInfo();
11970 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11971 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11972 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11973
11974 SDValue Val = Op0;
11975 // P10 hardware store forwarding requires that a single store contains all
11976 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11977 // to avoid load hit store on P10 when running binaries compiled for older
11978 // processors by generating two mergeable scalar stores to forward with the
11979 // vector load.
11980 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11981 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11982 ValVT.getSizeInBits() <= 64) {
11983 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11984 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11985 SDValue ShiftBy = DAG.getConstant(
11986 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11987 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11988 SDValue Plus8 =
11989 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11990 SDValue Store2 =
11991 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11992 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11993 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11994 MachinePointerInfo());
11995 }
11996
11997 // Store the input value into Value#0 of the stack slot.
11998 SDValue Store =
11999 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
12000 // Load it out.
12001 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
12002}
12003
12004SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
12005 SelectionDAG &DAG) const {
12006 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12007 "Should only be called for ISD::INSERT_VECTOR_ELT");
12008
12009 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
12010
12011 EVT VT = Op.getValueType();
12012 SDLoc dl(Op);
12013 SDValue V1 = Op.getOperand(0);
12014 SDValue V2 = Op.getOperand(1);
12015
12016 if (VT == MVT::v2f64 && C)
12017 return Op;
12018
12019 if (Subtarget.hasP9Vector()) {
12020 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
12021 // because on P10, it allows this specific insert_vector_elt load pattern to
12022 // utilize the refactored load and store infrastructure in order to exploit
12023 // prefixed loads.
12024 // On targets with inexpensive direct moves (Power9 and up), a
12025 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
12026 // load since a single precision load will involve conversion to double
12027 // precision on the load followed by another conversion to single precision.
12028 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
12029 (isa<LoadSDNode>(V2))) {
12030 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
12031 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
12032 SDValue InsVecElt =
12033 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
12034 BitcastLoad, Op.getOperand(2));
12035 return DAG.getBitcast(MVT::v4f32, InsVecElt);
12036 }
12037 }
12038
12039 if (Subtarget.isISA3_1()) {
12040 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12041 return SDValue();
12042 // On P10, we have legal lowering for constant and variable indices for
12043 // all vectors.
12044 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12045 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12046 return Op;
12047 }
12048
12049 // Before P10, we have legal lowering for constant indices but not for
12050 // variable ones.
12051 if (!C)
12052 return SDValue();
12053
12054 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12055 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12056 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
12057 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12058 unsigned InsertAtElement = C->getZExtValue();
12059 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12060 if (Subtarget.isLittleEndian()) {
12061 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12062 }
12063 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
12064 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12065 }
12066 return Op;
12067}
12068
12069SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12070 SelectionDAG &DAG) const {
12071 SDLoc dl(Op);
12072 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12073 SDValue LoadChain = LN->getChain();
12074 SDValue BasePtr = LN->getBasePtr();
12075 EVT VT = Op.getValueType();
12076 bool IsV1024i1 = VT == MVT::v1024i1;
12077 bool IsV2048i1 = VT == MVT::v2048i1;
12078
12079 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12080 // Dense Math dmr pair registers, respectively.
12081 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12082 (void)IsV2048i1;
12083 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12084 "Dense Math support required.");
12085 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12086
12088 SmallVector<SDValue, 8> LoadChains;
12089
12090 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12091 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12092 MachineMemOperand *MMO = LN->getMemOperand();
12093 unsigned NumVecs = VT.getSizeInBits() / 256;
12094 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12095 MachineMemOperand *NewMMO =
12096 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12097 if (Idx > 0) {
12098 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12099 DAG.getConstant(32, dl, BasePtr.getValueType()));
12100 LoadOps[2] = BasePtr;
12101 }
12103 DAG.getVTList(MVT::v256i1, MVT::Other),
12104 LoadOps, MVT::v256i1, NewMMO);
12105 LoadChains.push_back(Ld.getValue(1));
12106 Loads.push_back(Ld);
12107 }
12108
12109 if (Subtarget.isLittleEndian()) {
12110 std::reverse(Loads.begin(), Loads.end());
12111 std::reverse(LoadChains.begin(), LoadChains.end());
12112 }
12113
12114 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12115 SDValue Value = DMFInsert1024(Loads, dl, DAG);
12116
12117 if (IsV1024i1) {
12118 return DAG.getMergeValues({Value, TF}, dl);
12119 }
12120
12121 // Handle Loads for V2048i1 which represents a dmr pair.
12122 SmallVector<SDValue, 4> MoreLoads{Loads[4], Loads[5], Loads[6], Loads[7]};
12123 SDValue Dmr1Value = DMFInsert1024(MoreLoads, dl, DAG);
12124
12125 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12126 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12127
12128 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12129 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12130
12131 SDValue DmrPValue = SDValue(
12132 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12133
12134 return DAG.getMergeValues({DmrPValue, TF}, dl);
12135}
12136
12137SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12138 const SDLoc &dl,
12139 SelectionDAG &DAG) const {
12140 SDValue Lo =
12141 DAG.getNode(PPCISD::INST512, dl, MVT::v512i1, Pairs[0], Pairs[1]);
12142 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12143 SDValue Hi =
12144 DAG.getNode(PPCISD::INST512HI, dl, MVT::v512i1, Pairs[2], Pairs[3]);
12145 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12146 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12147
12148 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12149 {RC, Lo, LoSub, Hi, HiSub}),
12150 0);
12151}
12152
12153SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12154 SelectionDAG &DAG) const {
12155 SDLoc dl(Op);
12156 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12157 SDValue LoadChain = LN->getChain();
12158 SDValue BasePtr = LN->getBasePtr();
12159 EVT VT = Op.getValueType();
12160
12161 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12162 return LowerDMFVectorLoad(Op, DAG);
12163
12164 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12165 return Op;
12166
12167 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12168 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12169 "Type unsupported without MMA");
12170 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12171 "Type unsupported without paired vector support");
12172
12173 // For v256i1 on ISA Future, let the load go through to instruction selection
12174 // where it will be matched to lxvp/plxvp by the instruction patterns.
12175 if (VT == MVT::v256i1 && Subtarget.isISAFuture())
12176 return Op;
12177
12178 // For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
12179 // value in 2 or 4 vsx registers.
12180 Align Alignment = LN->getAlign();
12182 SmallVector<SDValue, 4> LoadChains;
12183 unsigned NumVecs = VT.getSizeInBits() / 128;
12184 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12185 SDValue Load =
12186 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12187 LN->getPointerInfo().getWithOffset(Idx * 16),
12188 commonAlignment(Alignment, Idx * 16),
12189 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12190 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12191 DAG.getConstant(16, dl, BasePtr.getValueType()));
12192 Loads.push_back(Load);
12193 LoadChains.push_back(Load.getValue(1));
12194 }
12195 if (Subtarget.isLittleEndian()) {
12196 std::reverse(Loads.begin(), Loads.end());
12197 std::reverse(LoadChains.begin(), LoadChains.end());
12198 }
12199 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12200 SDValue Value =
12201 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12202 dl, VT, Loads);
12203 SDValue RetOps[] = {Value, TF};
12204 return DAG.getMergeValues(RetOps, dl);
12205}
12206
12207SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12208 SelectionDAG &DAG) const {
12209
12210 SDLoc dl(Op);
12211 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12212 SDValue StoreChain = SN->getChain();
12213 SDValue BasePtr = SN->getBasePtr();
12216 EVT VT = SN->getValue().getValueType();
12217 bool IsV1024i1 = VT == MVT::v1024i1;
12218 bool IsV2048i1 = VT == MVT::v2048i1;
12219
12220 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12221 // Dense Math dmr pair registers, respectively.
12222 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12223 (void)IsV2048i1;
12224 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12225 "Dense Math support required.");
12226 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12227
12228 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12229 if (IsV1024i1) {
12231 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12232 Op.getOperand(1),
12233 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12234 0);
12236 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12237 Op.getOperand(1),
12238 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12239 0);
12240 MachineSDNode *ExtNode =
12241 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12242 Values.push_back(SDValue(ExtNode, 0));
12243 Values.push_back(SDValue(ExtNode, 1));
12244 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12245 Values.push_back(SDValue(ExtNode, 0));
12246 Values.push_back(SDValue(ExtNode, 1));
12247 } else {
12248 // This corresponds to v2048i1 which represents a dmr pair.
12249 SDValue Dmr0(
12250 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12251 Op.getOperand(1),
12252 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12253 0);
12254
12255 SDValue Dmr1(
12256 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12257 Op.getOperand(1),
12258 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12259 0);
12260
12261 SDValue Dmr0Lo(DAG.getMachineNode(
12262 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12263 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12264 0);
12265
12266 SDValue Dmr0Hi(DAG.getMachineNode(
12267 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12268 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12269 0);
12270
12271 SDValue Dmr1Lo(DAG.getMachineNode(
12272 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12273 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12274 0);
12275
12276 SDValue Dmr1Hi(DAG.getMachineNode(
12277 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12278 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12279 0);
12280
12281 MachineSDNode *ExtNode =
12282 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12283 Values.push_back(SDValue(ExtNode, 0));
12284 Values.push_back(SDValue(ExtNode, 1));
12285 ExtNode =
12286 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12287 Values.push_back(SDValue(ExtNode, 0));
12288 Values.push_back(SDValue(ExtNode, 1));
12289 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12290 Values.push_back(SDValue(ExtNode, 0));
12291 Values.push_back(SDValue(ExtNode, 1));
12292 ExtNode =
12293 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12294 Values.push_back(SDValue(ExtNode, 0));
12295 Values.push_back(SDValue(ExtNode, 1));
12296 }
12297
12298 if (Subtarget.isLittleEndian())
12299 std::reverse(Values.begin(), Values.end());
12300
12301 SDVTList Tys = DAG.getVTList(MVT::Other);
12303 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12304 Values[0], BasePtr};
12305 MachineMemOperand *MMO = SN->getMemOperand();
12306 unsigned NumVecs = VT.getSizeInBits() / 256;
12307 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12308 MachineMemOperand *NewMMO =
12309 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12310 if (Idx > 0) {
12311 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12312 DAG.getConstant(32, dl, BasePtr.getValueType()));
12313 Ops[3] = BasePtr;
12314 }
12315 Ops[2] = Values[Idx];
12317 MVT::v256i1, NewMMO);
12318 Stores.push_back(St);
12319 }
12320
12321 SDValue TF = DAG.getTokenFactor(dl, Stores);
12322 return TF;
12323}
12324
12325SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12326 SelectionDAG &DAG) const {
12327 SDLoc dl(Op);
12328 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12329 SDValue StoreChain = SN->getChain();
12330 SDValue BasePtr = SN->getBasePtr();
12331 SDValue Value = SN->getValue();
12332 SDValue Value2 = SN->getValue();
12333 EVT StoreVT = Value.getValueType();
12334
12335 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12336 return LowerDMFVectorStore(Op, DAG);
12337
12338 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12339 return Op;
12340
12341 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12342 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12343 "Type unsupported without MMA");
12344 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12345 "Type unsupported without paired vector support");
12346
12347 // For v256i1 on ISA Future, let the store go through to instruction selection
12348 // where it will be matched to stxvp/pstxvp by the instruction patterns.
12349 if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
12351 return Op;
12352
12353 // For other cases, create 2 or 4 v16i8 stores to store the pair or
12354 // accumulator underlying registers individually.
12355 Align Alignment = SN->getAlign();
12357 unsigned NumVecs = 2;
12358 if (StoreVT == MVT::v512i1) {
12359 if (Subtarget.isISAFuture()) {
12360 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12361 MachineSDNode *ExtNode = DAG.getMachineNode(
12362 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12363
12364 Value = SDValue(ExtNode, 0);
12365 Value2 = SDValue(ExtNode, 1);
12366 } else
12367 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12368 NumVecs = 4;
12369 }
12370 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12371 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12372 SDValue Elt;
12373 if (Subtarget.isISAFuture()) {
12374 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12375 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12376 Idx > 1 ? Value2 : Value,
12377 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12378 } else
12379 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12380 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12381
12382 SDValue Store =
12383 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12384 SN->getPointerInfo().getWithOffset(Idx * 16),
12385 commonAlignment(Alignment, Idx * 16),
12386 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12387 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12388 DAG.getConstant(16, dl, BasePtr.getValueType()));
12389 Stores.push_back(Store);
12390 }
12391 SDValue TF = DAG.getTokenFactor(dl, Stores);
12392 return TF;
12393}
12394
12395SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12396 SDLoc dl(Op);
12397 if (Op.getValueType() == MVT::v4i32) {
12398 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12399
12400 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12401 // +16 as shift amt.
12402 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12403 SDValue RHSSwap = // = vrlw RHS, 16
12404 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12405
12406 // Shrinkify inputs to v8i16.
12407 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12408 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12409 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12410
12411 // Low parts multiplied together, generating 32-bit results (we ignore the
12412 // top parts).
12413 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12414 LHS, RHS, DAG, dl, MVT::v4i32);
12415
12416 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12417 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12418 // Shift the high parts up 16 bits.
12419 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12420 Neg16, DAG, dl);
12421 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12422 } else if (Op.getValueType() == MVT::v16i8) {
12423 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12424 bool isLittleEndian = Subtarget.isLittleEndian();
12425
12426 // Multiply the even 8-bit parts, producing 16-bit sums.
12427 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12428 LHS, RHS, DAG, dl, MVT::v8i16);
12429 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12430
12431 // Multiply the odd 8-bit parts, producing 16-bit sums.
12432 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12433 LHS, RHS, DAG, dl, MVT::v8i16);
12434 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12435
12436 // Merge the results together. Because vmuleub and vmuloub are
12437 // instructions with a big-endian bias, we must reverse the
12438 // element numbering and reverse the meaning of "odd" and "even"
12439 // when generating little endian code.
12440 int Ops[16];
12441 for (unsigned i = 0; i != 8; ++i) {
12442 if (isLittleEndian) {
12443 Ops[i*2 ] = 2*i;
12444 Ops[i*2+1] = 2*i+16;
12445 } else {
12446 Ops[i*2 ] = 2*i+1;
12447 Ops[i*2+1] = 2*i+1+16;
12448 }
12449 }
12450 if (isLittleEndian)
12451 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12452 else
12453 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12454 } else {
12455 llvm_unreachable("Unknown mul to lower!");
12456 }
12457}
12458
12459SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12460 bool IsStrict = Op->isStrictFPOpcode();
12461 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12462 !Subtarget.hasP9Vector())
12463 return SDValue();
12464
12465 return Op;
12466}
12467
12468// Custom lowering for fpext vf32 to v2f64
12469SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12470
12471 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12472 "Should only be called for ISD::FP_EXTEND");
12473
12474 // FIXME: handle extends from half precision float vectors on P9.
12475 // We only want to custom lower an extend from v2f32 to v2f64.
12476 if (Op.getValueType() != MVT::v2f64 ||
12477 Op.getOperand(0).getValueType() != MVT::v2f32)
12478 return SDValue();
12479
12480 SDLoc dl(Op);
12481 SDValue Op0 = Op.getOperand(0);
12482
12483 switch (Op0.getOpcode()) {
12484 default:
12485 return SDValue();
12487 assert(Op0.getNumOperands() == 2 &&
12489 "Node should have 2 operands with second one being a constant!");
12490
12491 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12492 return SDValue();
12493
12494 // Custom lower is only done for high or low doubleword.
12495 int Idx = Op0.getConstantOperandVal(1);
12496 if (Idx % 2 != 0)
12497 return SDValue();
12498
12499 // Since input is v4f32, at this point Idx is either 0 or 2.
12500 // Shift to get the doubleword position we want.
12501 int DWord = Idx >> 1;
12502
12503 // High and low word positions are different on little endian.
12504 if (Subtarget.isLittleEndian())
12505 DWord ^= 0x1;
12506
12507 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12508 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12509 }
12510 case ISD::FADD:
12511 case ISD::FMUL:
12512 case ISD::FSUB: {
12513 SDValue NewLoad[2];
12514 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12515 // Ensure both input are loads.
12516 SDValue LdOp = Op0.getOperand(i);
12517 if (LdOp.getOpcode() != ISD::LOAD)
12518 return SDValue();
12519 // Generate new load node.
12520 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12521 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12522 NewLoad[i] = DAG.getMemIntrinsicNode(
12523 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12524 LD->getMemoryVT(), LD->getMemOperand());
12525 }
12526 SDValue NewOp =
12527 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12528 NewLoad[1], Op0.getNode()->getFlags());
12529 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12530 DAG.getConstant(0, dl, MVT::i32));
12531 }
12532 case ISD::LOAD: {
12533 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12534 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12535 SDValue NewLd = DAG.getMemIntrinsicNode(
12536 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12537 LD->getMemoryVT(), LD->getMemOperand());
12538 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12539 DAG.getConstant(0, dl, MVT::i32));
12540 }
12541 }
12542 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12543}
12544
12546 SelectionDAG &DAG,
12547 const PPCSubtarget &STI) {
12548 SDLoc DL(Value);
12549 if (STI.useCRBits())
12550 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12551 DAG.getConstant(1, DL, SumType),
12552 DAG.getConstant(0, DL, SumType));
12553 else
12554 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12555 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12556 Value, DAG.getAllOnesConstant(DL, SumType));
12557 return Sum.getValue(1);
12558}
12559
12561 EVT CarryType, SelectionDAG &DAG,
12562 const PPCSubtarget &STI) {
12563 SDLoc DL(Flag);
12564 SDValue Zero = DAG.getConstant(0, DL, SumType);
12565 SDValue Carry = DAG.getNode(
12566 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12567 if (STI.useCRBits())
12568 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12569 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12570}
12571
12572SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12573
12574 SDLoc DL(Op);
12575 SDNode *N = Op.getNode();
12576 EVT VT = N->getValueType(0);
12577 EVT CarryType = N->getValueType(1);
12578 unsigned Opc = N->getOpcode();
12579 bool IsAdd = Opc == ISD::UADDO;
12580 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12581 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12582 N->getOperand(0), N->getOperand(1));
12583 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12584 DAG, Subtarget);
12585 if (!IsAdd)
12586 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12587 DAG.getConstant(1UL, DL, CarryType));
12588 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12589}
12590
12591SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12592 SelectionDAG &DAG) const {
12593 SDLoc DL(Op);
12594 SDNode *N = Op.getNode();
12595 unsigned Opc = N->getOpcode();
12596 EVT VT = N->getValueType(0);
12597 EVT CarryType = N->getValueType(1);
12598 SDValue CarryOp = N->getOperand(2);
12599 bool IsAdd = Opc == ISD::UADDO_CARRY;
12600 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12601 if (!IsAdd)
12602 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12603 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12604 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12605 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12606 Op.getOperand(0), Op.getOperand(1), CarryOp);
12607 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12608 Subtarget);
12609 if (!IsAdd)
12610 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12611 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12612 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12613}
12614
12615SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12616
12617 SDLoc dl(Op);
12618 SDValue LHS = Op.getOperand(0);
12619 SDValue RHS = Op.getOperand(1);
12620 EVT VT = Op.getNode()->getValueType(0);
12621
12622 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12623
12624 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12625 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12626
12627 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12628
12629 SDValue Overflow =
12630 DAG.getNode(ISD::SRL, dl, VT, And,
12631 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12632
12633 SDValue OverflowTrunc =
12634 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12635
12636 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12637}
12638
12639/// Implements signed add with overflow detection using the rule:
12640/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12641SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12642
12643 SDLoc dl(Op);
12644 SDValue LHS = Op.getOperand(0);
12645 SDValue RHS = Op.getOperand(1);
12646 EVT VT = Op.getNode()->getValueType(0);
12647
12648 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
12649
12650 // Compute ~(x xor y)
12651 SDValue XorXY = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12652 SDValue EqvXY = DAG.getNOT(dl, XorXY, VT);
12653 // Compute (s xor x)
12654 SDValue SumXorX = DAG.getNode(ISD::XOR, dl, VT, Sum, LHS);
12655
12656 // overflow = (x eqv y) & (s xor x)
12657 SDValue OverflowInSign = DAG.getNode(ISD::AND, dl, VT, EqvXY, SumXorX);
12658
12659 // Shift sign bit down to LSB
12660 SDValue Overflow =
12661 DAG.getNode(ISD::SRL, dl, VT, OverflowInSign,
12662 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12663 // Truncate to the overflow type (i1)
12664 SDValue OverflowTrunc =
12665 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12666
12667 return DAG.getMergeValues({Sum, OverflowTrunc}, dl);
12668}
12669
12670// Lower unsigned 3-way compare producing -1/0/1.
12671SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12672 SDLoc DL(Op);
12673 SDValue A = DAG.getFreeze(Op.getOperand(0));
12674 SDValue B = DAG.getFreeze(Op.getOperand(1));
12675 EVT OpVT = A.getValueType();
12676 EVT ResVT = Op.getValueType();
12677
12678 // On PPC64, i32 carries are affected by the upper 32 bits of the registers.
12679 // We must zero-extend to i64 to ensure the carry reflects the 32-bit unsigned
12680 // comparison.
12681 if (Subtarget.isPPC64() && OpVT == MVT::i32) {
12682 A = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, A);
12683 B = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, B);
12684 OpVT = MVT::i64;
12685 }
12686
12687 // First compute diff = A - B.
12688 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12689
12690 // Generate B - A using SUBC to capture carry.
12691 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12692 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12693 SDValue CA0 = SubC.getValue(1);
12694
12695 // t2 = A - B + CA0 using SUBE.
12696 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12697 SDValue CA1 = SubE1.getValue(1);
12698
12699 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12700 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12701
12702 // Extract the first result and truncate to result type if needed.
12703 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12704}
12705
12706/// LowerOperation - Provide custom lowering hooks for some operations.
12707///
12709 switch (Op.getOpcode()) {
12710 default:
12711 llvm_unreachable("Wasn't expecting to be able to lower this!");
12712 case ISD::FPOW: return lowerPow(Op, DAG);
12713 case ISD::FSIN: return lowerSin(Op, DAG);
12714 case ISD::FCOS: return lowerCos(Op, DAG);
12715 case ISD::FLOG: return lowerLog(Op, DAG);
12716 case ISD::FLOG10: return lowerLog10(Op, DAG);
12717 case ISD::FEXP: return lowerExp(Op, DAG);
12718 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12719 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12720 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12721 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12722 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12723 case ISD::STRICT_FSETCC:
12725 case ISD::SETCC: return LowerSETCC(Op, DAG);
12726 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12727 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12728 case ISD::SSUBO:
12729 return LowerSSUBO(Op, DAG);
12730 case ISD::SADDO:
12731 return LowerSADDO(Op, DAG);
12732
12733 case ISD::INLINEASM:
12734 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12735 // Variable argument lowering.
12736 case ISD::VASTART: return LowerVASTART(Op, DAG);
12737 case ISD::VAARG: return LowerVAARG(Op, DAG);
12738 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12739
12740 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12741 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12743 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12744
12745 // Exception handling lowering.
12746 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12747 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12748 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12749
12750 case ISD::LOAD: return LowerLOAD(Op, DAG);
12751 case ISD::STORE: return LowerSTORE(Op, DAG);
12752 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12753 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12756 case ISD::FP_TO_UINT:
12757 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12760 case ISD::UINT_TO_FP:
12761 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12762 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12763 case ISD::SET_ROUNDING:
12764 return LowerSET_ROUNDING(Op, DAG);
12765
12766 // Lower 64-bit shifts.
12767 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12768 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12769 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12770
12771 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12772 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12773
12774 // Vector-related lowering.
12775 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12776 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12777 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12778 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12779 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12780 case ISD::MUL: return LowerMUL(Op, DAG);
12781 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12783 case ISD::FP_ROUND:
12784 return LowerFP_ROUND(Op, DAG);
12785 case ISD::ROTL: return LowerROTL(Op, DAG);
12786
12787 // For counter-based loop handling.
12789 return LowerINTRINSIC_W_CHAIN(Op, DAG);
12790
12791 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12792
12793 // Frame & Return address.
12794 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12795 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12796
12798 return LowerINTRINSIC_VOID(Op, DAG);
12799 case ISD::BSWAP:
12800 return LowerBSWAP(Op, DAG);
12802 return LowerATOMIC_CMP_SWAP(Op, DAG);
12803 case ISD::ATOMIC_STORE:
12804 return LowerATOMIC_LOAD_STORE(Op, DAG);
12805 case ISD::IS_FPCLASS:
12806 return LowerIS_FPCLASS(Op, DAG);
12807 case ISD::UADDO:
12808 case ISD::USUBO:
12809 return LowerADDSUBO(Op, DAG);
12810 case ISD::UADDO_CARRY:
12811 case ISD::USUBO_CARRY:
12812 return LowerADDSUBO_CARRY(Op, DAG);
12813 case ISD::UCMP:
12814 return LowerUCMP(Op, DAG);
12815 case ISD::STRICT_LRINT:
12816 case ISD::STRICT_LLRINT:
12817 case ISD::STRICT_LROUND:
12820 if (Op->getFlags().hasNoFPExcept())
12821 return Op;
12822 return SDValue();
12823 case ISD::VP_LOAD:
12824 return LowerVP_LOAD(Op, DAG);
12825 case ISD::VP_STORE:
12826 return LowerVP_STORE(Op, DAG);
12827 }
12828}
12829
12832 SelectionDAG &DAG) const {
12833 SDLoc dl(N);
12834 switch (N->getOpcode()) {
12835 default:
12836 llvm_unreachable("Do not know how to custom type legalize this operation!");
12837 case ISD::ATOMIC_LOAD: {
12838 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12839 Results.push_back(Res);
12840 Results.push_back(Res.getValue(1));
12841 break;
12842 }
12843 case ISD::READCYCLECOUNTER: {
12844 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12845 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12846
12847 Results.push_back(
12848 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12849 Results.push_back(RTB.getValue(2));
12850 break;
12851 }
12853 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12854 break;
12855
12856 assert(N->getValueType(0) == MVT::i1 &&
12857 "Unexpected result type for CTR decrement intrinsic");
12858 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12859 N->getValueType(0));
12860 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12861 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12862 N->getOperand(1));
12863
12864 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12865 Results.push_back(NewInt.getValue(1));
12866 break;
12867 }
12869 switch (N->getConstantOperandVal(0)) {
12870 case Intrinsic::ppc_pack_longdouble:
12871 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12872 N->getOperand(2), N->getOperand(1)));
12873 break;
12874 case Intrinsic::ppc_maxfe:
12875 case Intrinsic::ppc_minfe:
12876 case Intrinsic::ppc_fnmsub:
12877 case Intrinsic::ppc_convert_f128_to_ppcf128:
12878 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12879 break;
12880 }
12881 break;
12882 }
12883 case ISD::VAARG: {
12884 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12885 return;
12886
12887 EVT VT = N->getValueType(0);
12888
12889 if (VT == MVT::i64) {
12890 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12891
12892 Results.push_back(NewNode);
12893 Results.push_back(NewNode.getValue(1));
12894 }
12895 return;
12896 }
12899 case ISD::FP_TO_SINT:
12900 case ISD::FP_TO_UINT: {
12901 // LowerFP_TO_INT() can only handle f32 and f64.
12902 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12903 MVT::ppcf128)
12904 return;
12905 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12906 Results.push_back(LoweredValue);
12907 if (N->isStrictFPOpcode())
12908 Results.push_back(LoweredValue.getValue(1));
12909 return;
12910 }
12911 case ISD::TRUNCATE: {
12912 if (!N->getValueType(0).isVector())
12913 return;
12914 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12915 if (Lowered)
12916 Results.push_back(Lowered);
12917 return;
12918 }
12919 case ISD::SCALAR_TO_VECTOR: {
12920 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12921 if (Lowered)
12922 Results.push_back(Lowered);
12923 return;
12924 }
12925 case ISD::FSHL:
12926 case ISD::FSHR:
12927 // Don't handle funnel shifts here.
12928 return;
12929 case ISD::BITCAST:
12930 // Don't handle bitcast here.
12931 return;
12932 case ISD::FP_EXTEND:
12933 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12934 if (Lowered)
12935 Results.push_back(Lowered);
12936 return;
12937 }
12938}
12939
12940//===----------------------------------------------------------------------===//
12941// Other Lowering Code
12942//===----------------------------------------------------------------------===//
12943
12945 return Builder.CreateIntrinsic(Id, {});
12946}
12947
12949 Value *Addr,
12950 AtomicOrdering Ord) const {
12951 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12952
12953 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12954 "Only 8/16/32/64-bit atomic loads supported");
12955 Intrinsic::ID IntID;
12956 switch (SZ) {
12957 default:
12958 llvm_unreachable("Unexpected PrimitiveSize");
12959 case 8:
12960 IntID = Intrinsic::ppc_lbarx;
12961 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12962 break;
12963 case 16:
12964 IntID = Intrinsic::ppc_lharx;
12965 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12966 break;
12967 case 32:
12968 IntID = Intrinsic::ppc_lwarx;
12969 break;
12970 case 64:
12971 IntID = Intrinsic::ppc_ldarx;
12972 break;
12973 }
12974 Value *Call =
12975 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12976
12977 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12978}
12979
12980// Perform a store-conditional operation to Addr. Return the status of the
12981// store. This should be 0 if the store succeeded, non-zero otherwise.
12983 Value *Val, Value *Addr,
12984 AtomicOrdering Ord) const {
12985 Type *Ty = Val->getType();
12986 unsigned SZ = Ty->getPrimitiveSizeInBits();
12987
12988 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12989 "Only 8/16/32/64-bit atomic loads supported");
12990 Intrinsic::ID IntID;
12991 switch (SZ) {
12992 default:
12993 llvm_unreachable("Unexpected PrimitiveSize");
12994 case 8:
12995 IntID = Intrinsic::ppc_stbcx;
12996 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12997 break;
12998 case 16:
12999 IntID = Intrinsic::ppc_sthcx;
13000 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13001 break;
13002 case 32:
13003 IntID = Intrinsic::ppc_stwcx;
13004 break;
13005 case 64:
13006 IntID = Intrinsic::ppc_stdcx;
13007 break;
13008 }
13009
13010 if (SZ == 8 || SZ == 16)
13011 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
13012
13013 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
13014 /*FMFSource=*/nullptr, "stcx");
13015 return Builder.CreateXor(Call, Builder.getInt32(1));
13016}
13017
13018// The mappings for emitLeading/TrailingFence is taken from
13019// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
13021 Instruction *Inst,
13022 AtomicOrdering Ord) const {
13024 return callIntrinsic(Builder, Intrinsic::ppc_sync);
13025 if (isReleaseOrStronger(Ord))
13026 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13027 return nullptr;
13028}
13029
13031 Instruction *Inst,
13032 AtomicOrdering Ord) const {
13033 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
13034 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
13035 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
13036 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
13037 if (isa<LoadInst>(Inst))
13038 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
13039 {Inst});
13040 // FIXME: Can use isync for rmw operation.
13041 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13042 }
13043 return nullptr;
13044}
13045
13048 unsigned AtomicSize,
13049 unsigned BinOpcode,
13050 unsigned CmpOpcode,
13051 unsigned CmpPred) const {
13052 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13053 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13054
13055 auto LoadMnemonic = PPC::LDARX;
13056 auto StoreMnemonic = PPC::STDCX;
13057 switch (AtomicSize) {
13058 default:
13059 llvm_unreachable("Unexpected size of atomic entity");
13060 case 1:
13061 LoadMnemonic = PPC::LBARX;
13062 StoreMnemonic = PPC::STBCX;
13063 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13064 break;
13065 case 2:
13066 LoadMnemonic = PPC::LHARX;
13067 StoreMnemonic = PPC::STHCX;
13068 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13069 break;
13070 case 4:
13071 LoadMnemonic = PPC::LWARX;
13072 StoreMnemonic = PPC::STWCX;
13073 break;
13074 case 8:
13075 LoadMnemonic = PPC::LDARX;
13076 StoreMnemonic = PPC::STDCX;
13077 break;
13078 }
13079
13080 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13081 MachineFunction *F = BB->getParent();
13083
13084 Register dest = MI.getOperand(0).getReg();
13085 Register ptrA = MI.getOperand(1).getReg();
13086 Register ptrB = MI.getOperand(2).getReg();
13087 Register incr = MI.getOperand(3).getReg();
13088 DebugLoc dl = MI.getDebugLoc();
13089
13090 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13091 MachineBasicBlock *loop2MBB =
13092 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13093 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13094 F->insert(It, loopMBB);
13095 if (CmpOpcode)
13096 F->insert(It, loop2MBB);
13097 F->insert(It, exitMBB);
13098 exitMBB->splice(exitMBB->begin(), BB,
13099 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13101
13102 MachineRegisterInfo &RegInfo = F->getRegInfo();
13103 Register TmpReg = (!BinOpcode) ? incr :
13104 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
13105 : &PPC::GPRCRegClass);
13106
13107 // thisMBB:
13108 // ...
13109 // fallthrough --> loopMBB
13110 BB->addSuccessor(loopMBB);
13111
13112 // loopMBB:
13113 // l[wd]arx dest, ptr
13114 // add r0, dest, incr
13115 // st[wd]cx. r0, ptr
13116 // bne- loopMBB
13117 // fallthrough --> exitMBB
13118
13119 // For max/min...
13120 // loopMBB:
13121 // l[wd]arx dest, ptr
13122 // cmpl?[wd] dest, incr
13123 // bgt exitMBB
13124 // loop2MBB:
13125 // st[wd]cx. dest, ptr
13126 // bne- loopMBB
13127 // fallthrough --> exitMBB
13128
13129 BB = loopMBB;
13130 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13131 .addReg(ptrA).addReg(ptrB);
13132 if (BinOpcode)
13133 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13134 if (CmpOpcode) {
13135 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13136 // Signed comparisons of byte or halfword values must be sign-extended.
13137 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13138 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13139 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13140 ExtReg).addReg(dest);
13141 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13142 } else
13143 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13144
13145 BuildMI(BB, dl, TII->get(PPC::BCC))
13146 .addImm(CmpPred)
13147 .addReg(CrReg)
13148 .addMBB(exitMBB);
13149 BB->addSuccessor(loop2MBB);
13150 BB->addSuccessor(exitMBB);
13151 BB = loop2MBB;
13152 }
13153 BuildMI(BB, dl, TII->get(StoreMnemonic))
13154 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13155 BuildMI(BB, dl, TII->get(PPC::BCC))
13157 .addReg(PPC::CR0)
13158 .addMBB(loopMBB);
13159 BB->addSuccessor(loopMBB);
13160 BB->addSuccessor(exitMBB);
13161
13162 // exitMBB:
13163 // ...
13164 BB = exitMBB;
13165 return BB;
13166}
13167
13169 switch(MI.getOpcode()) {
13170 default:
13171 return false;
13172 case PPC::COPY:
13173 return TII->isSignExtended(MI.getOperand(1).getReg(),
13174 &MI.getMF()->getRegInfo());
13175 case PPC::LHA:
13176 case PPC::LHA8:
13177 case PPC::LHAU:
13178 case PPC::LHAU8:
13179 case PPC::LHAUX:
13180 case PPC::LHAUX8:
13181 case PPC::LHAX:
13182 case PPC::LHAX8:
13183 case PPC::LWA:
13184 case PPC::LWAUX:
13185 case PPC::LWAX:
13186 case PPC::LWAX_32:
13187 case PPC::LWA_32:
13188 case PPC::PLHA:
13189 case PPC::PLHA8:
13190 case PPC::PLHA8pc:
13191 case PPC::PLHApc:
13192 case PPC::PLWA:
13193 case PPC::PLWA8:
13194 case PPC::PLWA8pc:
13195 case PPC::PLWApc:
13196 case PPC::EXTSB:
13197 case PPC::EXTSB8:
13198 case PPC::EXTSB8_32_64:
13199 case PPC::EXTSB8_rec:
13200 case PPC::EXTSB_rec:
13201 case PPC::EXTSH:
13202 case PPC::EXTSH8:
13203 case PPC::EXTSH8_32_64:
13204 case PPC::EXTSH8_rec:
13205 case PPC::EXTSH_rec:
13206 case PPC::EXTSW:
13207 case PPC::EXTSWSLI:
13208 case PPC::EXTSWSLI_32_64:
13209 case PPC::EXTSWSLI_32_64_rec:
13210 case PPC::EXTSWSLI_rec:
13211 case PPC::EXTSW_32:
13212 case PPC::EXTSW_32_64:
13213 case PPC::EXTSW_32_64_rec:
13214 case PPC::EXTSW_rec:
13215 case PPC::SRAW:
13216 case PPC::SRAWI:
13217 case PPC::SRAWI_rec:
13218 case PPC::SRAW_rec:
13219 return true;
13220 }
13221 return false;
13222}
13223
13226 bool is8bit, // operation
13227 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13228 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13229 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13230
13231 // If this is a signed comparison and the value being compared is not known
13232 // to be sign extended, sign extend it here.
13233 DebugLoc dl = MI.getDebugLoc();
13234 MachineFunction *F = BB->getParent();
13235 MachineRegisterInfo &RegInfo = F->getRegInfo();
13236 Register incr = MI.getOperand(3).getReg();
13237 bool IsSignExtended =
13238 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
13239
13240 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13241 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13242 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13243 .addReg(MI.getOperand(3).getReg());
13244 MI.getOperand(3).setReg(ValueReg);
13245 incr = ValueReg;
13246 }
13247 // If we support part-word atomic mnemonics, just use them
13248 if (Subtarget.hasPartwordAtomics())
13249 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13250 CmpPred);
13251
13252 // In 64 bit mode we have to use 64 bits for addresses, even though the
13253 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13254 // registers without caring whether they're 32 or 64, but here we're
13255 // doing actual arithmetic on the addresses.
13256 bool is64bit = Subtarget.isPPC64();
13257 bool isLittleEndian = Subtarget.isLittleEndian();
13258 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13259
13260 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13262
13263 Register dest = MI.getOperand(0).getReg();
13264 Register ptrA = MI.getOperand(1).getReg();
13265 Register ptrB = MI.getOperand(2).getReg();
13266
13267 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13268 MachineBasicBlock *loop2MBB =
13269 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13270 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13271 F->insert(It, loopMBB);
13272 if (CmpOpcode)
13273 F->insert(It, loop2MBB);
13274 F->insert(It, exitMBB);
13275 exitMBB->splice(exitMBB->begin(), BB,
13276 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13278
13279 const TargetRegisterClass *RC =
13280 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13281 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13282
13283 Register PtrReg = RegInfo.createVirtualRegister(RC);
13284 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13285 Register ShiftReg =
13286 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13287 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13288 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13289 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13290 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13291 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13292 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13293 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13294 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13295 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13296 Register Ptr1Reg;
13297 Register TmpReg =
13298 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13299
13300 // thisMBB:
13301 // ...
13302 // fallthrough --> loopMBB
13303 BB->addSuccessor(loopMBB);
13304
13305 // The 4-byte load must be aligned, while a char or short may be
13306 // anywhere in the word. Hence all this nasty bookkeeping code.
13307 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13308 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13309 // xori shift, shift1, 24 [16]
13310 // rlwinm ptr, ptr1, 0, 0, 29
13311 // slw incr2, incr, shift
13312 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13313 // slw mask, mask2, shift
13314 // loopMBB:
13315 // lwarx tmpDest, ptr
13316 // add tmp, tmpDest, incr2
13317 // andc tmp2, tmpDest, mask
13318 // and tmp3, tmp, mask
13319 // or tmp4, tmp3, tmp2
13320 // stwcx. tmp4, ptr
13321 // bne- loopMBB
13322 // fallthrough --> exitMBB
13323 // srw SrwDest, tmpDest, shift
13324 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13325 if (ptrA != ZeroReg) {
13326 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13327 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13328 .addReg(ptrA)
13329 .addReg(ptrB);
13330 } else {
13331 Ptr1Reg = ptrB;
13332 }
13333 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13334 // mode.
13335 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13336 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
13337 .addImm(3)
13338 .addImm(27)
13339 .addImm(is8bit ? 28 : 27);
13340 if (!isLittleEndian)
13341 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13342 .addReg(Shift1Reg)
13343 .addImm(is8bit ? 24 : 16);
13344 if (is64bit)
13345 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13346 .addReg(Ptr1Reg)
13347 .addImm(0)
13348 .addImm(61);
13349 else
13350 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13351 .addReg(Ptr1Reg)
13352 .addImm(0)
13353 .addImm(0)
13354 .addImm(29);
13355 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13356 if (is8bit)
13357 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13358 else {
13359 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13360 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13361 .addReg(Mask3Reg)
13362 .addImm(65535);
13363 }
13364 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13365 .addReg(Mask2Reg)
13366 .addReg(ShiftReg);
13367
13368 BB = loopMBB;
13369 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13370 .addReg(ZeroReg)
13371 .addReg(PtrReg);
13372 if (BinOpcode)
13373 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13374 .addReg(Incr2Reg)
13375 .addReg(TmpDestReg);
13376 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13377 .addReg(TmpDestReg)
13378 .addReg(MaskReg);
13379 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13380 if (CmpOpcode) {
13381 // For unsigned comparisons, we can directly compare the shifted values.
13382 // For signed comparisons we shift and sign extend.
13383 Register SReg = RegInfo.createVirtualRegister(GPRC);
13384 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13385 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13386 .addReg(TmpDestReg)
13387 .addReg(MaskReg);
13388 unsigned ValueReg = SReg;
13389 unsigned CmpReg = Incr2Reg;
13390 if (CmpOpcode == PPC::CMPW) {
13391 ValueReg = RegInfo.createVirtualRegister(GPRC);
13392 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13393 .addReg(SReg)
13394 .addReg(ShiftReg);
13395 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13396 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13397 .addReg(ValueReg);
13398 ValueReg = ValueSReg;
13399 CmpReg = incr;
13400 }
13401 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13402 BuildMI(BB, dl, TII->get(PPC::BCC))
13403 .addImm(CmpPred)
13404 .addReg(CrReg)
13405 .addMBB(exitMBB);
13406 BB->addSuccessor(loop2MBB);
13407 BB->addSuccessor(exitMBB);
13408 BB = loop2MBB;
13409 }
13410 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13411 BuildMI(BB, dl, TII->get(PPC::STWCX))
13412 .addReg(Tmp4Reg)
13413 .addReg(ZeroReg)
13414 .addReg(PtrReg);
13415 BuildMI(BB, dl, TII->get(PPC::BCC))
13417 .addReg(PPC::CR0)
13418 .addMBB(loopMBB);
13419 BB->addSuccessor(loopMBB);
13420 BB->addSuccessor(exitMBB);
13421
13422 // exitMBB:
13423 // ...
13424 BB = exitMBB;
13425 // Since the shift amount is not a constant, we need to clear
13426 // the upper bits with a separate RLWINM.
13427 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13428 .addReg(SrwDestReg)
13429 .addImm(0)
13430 .addImm(is8bit ? 24 : 16)
13431 .addImm(31);
13432 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13433 .addReg(TmpDestReg)
13434 .addReg(ShiftReg);
13435 return BB;
13436}
13437
13440 MachineBasicBlock *MBB) const {
13441 DebugLoc DL = MI.getDebugLoc();
13442 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13443 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13444
13445 MachineFunction *MF = MBB->getParent();
13446 MachineRegisterInfo &MRI = MF->getRegInfo();
13447
13448 const BasicBlock *BB = MBB->getBasicBlock();
13449 MachineFunction::iterator I = ++MBB->getIterator();
13450
13451 Register DstReg = MI.getOperand(0).getReg();
13452 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13453 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13454 Register mainDstReg = MRI.createVirtualRegister(RC);
13455 Register restoreDstReg = MRI.createVirtualRegister(RC);
13456
13457 MVT PVT = getPointerTy(MF->getDataLayout());
13458 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13459 "Invalid Pointer Size!");
13460 // For v = setjmp(buf), we generate
13461 //
13462 // thisMBB:
13463 // SjLjSetup mainMBB
13464 // bl mainMBB
13465 // v_restore = 1
13466 // b sinkMBB
13467 //
13468 // mainMBB:
13469 // buf[LabelOffset] = LR
13470 // v_main = 0
13471 //
13472 // sinkMBB:
13473 // v = phi(main, restore)
13474 //
13475
13476 MachineBasicBlock *thisMBB = MBB;
13477 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13478 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13479 MF->insert(I, mainMBB);
13480 MF->insert(I, sinkMBB);
13481
13483
13484 // Transfer the remainder of BB and its successor edges to sinkMBB.
13485 sinkMBB->splice(sinkMBB->begin(), MBB,
13486 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13488
13489 // Note that the structure of the jmp_buf used here is not compatible
13490 // with that used by libc, and is not designed to be. Specifically, it
13491 // stores only those 'reserved' registers that LLVM does not otherwise
13492 // understand how to spill. Also, by convention, by the time this
13493 // intrinsic is called, Clang has already stored the frame address in the
13494 // first slot of the buffer and stack address in the third. Following the
13495 // X86 target code, we'll store the jump address in the second slot. We also
13496 // need to save the TOC pointer (R2) to handle jumps between shared
13497 // libraries, and that will be stored in the fourth slot. The thread
13498 // identifier (R13) is not affected.
13499
13500 // thisMBB:
13501 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13502 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13503 const int64_t BPOffset = 4 * PVT.getStoreSize();
13504
13505 // Prepare IP either in reg.
13506 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13507 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13508 Register BufReg = MI.getOperand(1).getReg();
13509
13510 if (Subtarget.is64BitELFABI()) {
13511 setUsesTOCBasePtr(*MBB->getParent());
13512 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13513 .addReg(PPC::X2)
13514 .addImm(TOCOffset)
13515 .addReg(BufReg)
13516 .cloneMemRefs(MI);
13517 }
13518
13519 // Naked functions never have a base pointer, and so we use r1. For all
13520 // other functions, this decision must be delayed until during PEI.
13521 unsigned BaseReg;
13522 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13523 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13524 else
13525 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13526
13527 MIB = BuildMI(*thisMBB, MI, DL,
13528 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13529 .addReg(BaseReg)
13530 .addImm(BPOffset)
13531 .addReg(BufReg)
13532 .cloneMemRefs(MI);
13533
13534 // Setup
13535 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13536 MIB.addRegMask(TRI->getNoPreservedMask());
13537
13538 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13539
13540 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13541 .addMBB(mainMBB);
13542 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13543
13544 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13545 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13546
13547 // mainMBB:
13548 // mainDstReg = 0
13549 MIB =
13550 BuildMI(mainMBB, DL,
13551 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13552
13553 // Store IP
13554 if (Subtarget.isPPC64()) {
13555 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13556 .addReg(LabelReg)
13557 .addImm(LabelOffset)
13558 .addReg(BufReg);
13559 } else {
13560 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13561 .addReg(LabelReg)
13562 .addImm(LabelOffset)
13563 .addReg(BufReg);
13564 }
13565 MIB.cloneMemRefs(MI);
13566
13567 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13568 mainMBB->addSuccessor(sinkMBB);
13569
13570 // sinkMBB:
13571 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13572 TII->get(PPC::PHI), DstReg)
13573 .addReg(mainDstReg).addMBB(mainMBB)
13574 .addReg(restoreDstReg).addMBB(thisMBB);
13575
13576 MI.eraseFromParent();
13577 return sinkMBB;
13578}
13579
13582 MachineBasicBlock *MBB) const {
13583 DebugLoc DL = MI.getDebugLoc();
13584 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13585
13586 MachineFunction *MF = MBB->getParent();
13587 MachineRegisterInfo &MRI = MF->getRegInfo();
13588
13589 MVT PVT = getPointerTy(MF->getDataLayout());
13590 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13591 "Invalid Pointer Size!");
13592
13593 const TargetRegisterClass *RC =
13594 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13595 Register Tmp = MRI.createVirtualRegister(RC);
13596 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13597 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13598 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13599 unsigned BP =
13600 (PVT == MVT::i64)
13601 ? PPC::X30
13602 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13603 : PPC::R30);
13604
13606
13607 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13608 const int64_t SPOffset = 2 * PVT.getStoreSize();
13609 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13610 const int64_t BPOffset = 4 * PVT.getStoreSize();
13611
13612 Register BufReg = MI.getOperand(0).getReg();
13613
13614 // Reload FP (the jumped-to function may not have had a
13615 // frame pointer, and if so, then its r31 will be restored
13616 // as necessary).
13617 if (PVT == MVT::i64) {
13618 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13619 .addImm(0)
13620 .addReg(BufReg);
13621 } else {
13622 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13623 .addImm(0)
13624 .addReg(BufReg);
13625 }
13626 MIB.cloneMemRefs(MI);
13627
13628 // Reload IP
13629 if (PVT == MVT::i64) {
13630 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13631 .addImm(LabelOffset)
13632 .addReg(BufReg);
13633 } else {
13634 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13635 .addImm(LabelOffset)
13636 .addReg(BufReg);
13637 }
13638 MIB.cloneMemRefs(MI);
13639
13640 // Reload SP
13641 if (PVT == MVT::i64) {
13642 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13643 .addImm(SPOffset)
13644 .addReg(BufReg);
13645 } else {
13646 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13647 .addImm(SPOffset)
13648 .addReg(BufReg);
13649 }
13650 MIB.cloneMemRefs(MI);
13651
13652 // Reload BP
13653 if (PVT == MVT::i64) {
13654 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13655 .addImm(BPOffset)
13656 .addReg(BufReg);
13657 } else {
13658 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13659 .addImm(BPOffset)
13660 .addReg(BufReg);
13661 }
13662 MIB.cloneMemRefs(MI);
13663
13664 // Reload TOC
13665 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13666 setUsesTOCBasePtr(*MBB->getParent());
13667 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13668 .addImm(TOCOffset)
13669 .addReg(BufReg)
13670 .cloneMemRefs(MI);
13671 }
13672
13673 // Jump
13674 BuildMI(*MBB, MI, DL,
13675 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13676 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13677
13678 MI.eraseFromParent();
13679 return MBB;
13680}
13681
13683 // If the function specifically requests inline stack probes, emit them.
13684 if (MF.getFunction().hasFnAttribute("probe-stack"))
13685 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13686 "inline-asm";
13687 return false;
13688}
13689
13691 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13692 unsigned StackAlign = TFI->getStackAlignment();
13693 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13694 "Unexpected stack alignment");
13695 // The default stack probe size is 4096 if the function has no
13696 // stack-probe-size attribute.
13697 const Function &Fn = MF.getFunction();
13698 unsigned StackProbeSize =
13699 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13700 // Round down to the stack alignment.
13701 StackProbeSize &= ~(StackAlign - 1);
13702 return StackProbeSize ? StackProbeSize : StackAlign;
13703}
13704
13705// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13706// into three phases. In the first phase, it uses pseudo instruction
13707// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13708// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13709// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13710// MaxCallFrameSize so that it can calculate correct data area pointer.
13713 MachineBasicBlock *MBB) const {
13714 const bool isPPC64 = Subtarget.isPPC64();
13715 MachineFunction *MF = MBB->getParent();
13716 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13717 DebugLoc DL = MI.getDebugLoc();
13718 const unsigned ProbeSize = getStackProbeSize(*MF);
13719 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13720 MachineRegisterInfo &MRI = MF->getRegInfo();
13721 // The CFG of probing stack looks as
13722 // +-----+
13723 // | MBB |
13724 // +--+--+
13725 // |
13726 // +----v----+
13727 // +--->+ TestMBB +---+
13728 // | +----+----+ |
13729 // | | |
13730 // | +-----v----+ |
13731 // +---+ BlockMBB | |
13732 // +----------+ |
13733 // |
13734 // +---------+ |
13735 // | TailMBB +<--+
13736 // +---------+
13737 // In MBB, calculate previous frame pointer and final stack pointer.
13738 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13739 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13740 // TailMBB is spliced via \p MI.
13741 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13742 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13743 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13744
13745 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13746 MF->insert(MBBIter, TestMBB);
13747 MF->insert(MBBIter, BlockMBB);
13748 MF->insert(MBBIter, TailMBB);
13749
13750 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13751 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13752
13753 Register DstReg = MI.getOperand(0).getReg();
13754 Register NegSizeReg = MI.getOperand(1).getReg();
13755 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13756 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13757 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13758 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13759
13760 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13761 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13762 // NegSize.
13763 unsigned ProbeOpc;
13764 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13765 ProbeOpc =
13766 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13767 else
13768 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13769 // and NegSizeReg will be allocated in the same phyreg to avoid
13770 // redundant copy when NegSizeReg has only one use which is current MI and
13771 // will be replaced by PREPARE_PROBED_ALLOCA then.
13772 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13773 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13774 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13775 .addDef(ActualNegSizeReg)
13776 .addReg(NegSizeReg)
13777 .add(MI.getOperand(2))
13778 .add(MI.getOperand(3));
13779
13780 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13781 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13782 FinalStackPtr)
13783 .addReg(SPReg)
13784 .addReg(ActualNegSizeReg);
13785
13786 // Materialize a scratch register for update.
13787 int64_t NegProbeSize = -(int64_t)ProbeSize;
13788 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13789 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13790 if (!isInt<16>(NegProbeSize)) {
13791 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13792 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13793 .addImm(NegProbeSize >> 16);
13794 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13795 ScratchReg)
13796 .addReg(TempReg)
13797 .addImm(NegProbeSize & 0xFFFF);
13798 } else
13799 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13800 .addImm(NegProbeSize);
13801
13802 {
13803 // Probing leading residual part.
13804 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13805 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13806 .addReg(ActualNegSizeReg)
13807 .addReg(ScratchReg);
13808 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13809 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13810 .addReg(Div)
13811 .addReg(ScratchReg);
13812 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13813 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13814 .addReg(Mul)
13815 .addReg(ActualNegSizeReg);
13816 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13817 .addReg(FramePointer)
13818 .addReg(SPReg)
13819 .addReg(NegMod);
13820 }
13821
13822 {
13823 // Remaining part should be multiple of ProbeSize.
13824 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13825 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13826 .addReg(SPReg)
13827 .addReg(FinalStackPtr);
13828 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13830 .addReg(CmpResult)
13831 .addMBB(TailMBB);
13832 TestMBB->addSuccessor(BlockMBB);
13833 TestMBB->addSuccessor(TailMBB);
13834 }
13835
13836 {
13837 // Touch the block.
13838 // |P...|P...|P...
13839 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13840 .addReg(FramePointer)
13841 .addReg(SPReg)
13842 .addReg(ScratchReg);
13843 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13844 BlockMBB->addSuccessor(TestMBB);
13845 }
13846
13847 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13848 // DYNAREAOFFSET pseudo instruction to get the future result.
13849 Register MaxCallFrameSizeReg =
13850 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13851 BuildMI(TailMBB, DL,
13852 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13853 MaxCallFrameSizeReg)
13854 .add(MI.getOperand(2))
13855 .add(MI.getOperand(3));
13856 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13857 .addReg(SPReg)
13858 .addReg(MaxCallFrameSizeReg);
13859
13860 // Splice instructions after MI to TailMBB.
13861 TailMBB->splice(TailMBB->end(), MBB,
13862 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13864 MBB->addSuccessor(TestMBB);
13865
13866 // Delete the pseudo instruction.
13867 MI.eraseFromParent();
13868
13869 ++NumDynamicAllocaProbed;
13870 return TailMBB;
13871}
13872
13874 switch (MI.getOpcode()) {
13875 case PPC::SELECT_CC_I4:
13876 case PPC::SELECT_CC_I8:
13877 case PPC::SELECT_CC_F4:
13878 case PPC::SELECT_CC_F8:
13879 case PPC::SELECT_CC_F16:
13880 case PPC::SELECT_CC_VRRC:
13881 case PPC::SELECT_CC_VSFRC:
13882 case PPC::SELECT_CC_VSSRC:
13883 case PPC::SELECT_CC_VSRC:
13884 case PPC::SELECT_CC_SPE4:
13885 case PPC::SELECT_CC_SPE:
13886 return true;
13887 default:
13888 return false;
13889 }
13890}
13891
13892static bool IsSelect(MachineInstr &MI) {
13893 switch (MI.getOpcode()) {
13894 case PPC::SELECT_I4:
13895 case PPC::SELECT_I8:
13896 case PPC::SELECT_F4:
13897 case PPC::SELECT_F8:
13898 case PPC::SELECT_F16:
13899 case PPC::SELECT_SPE:
13900 case PPC::SELECT_SPE4:
13901 case PPC::SELECT_VRRC:
13902 case PPC::SELECT_VSFRC:
13903 case PPC::SELECT_VSSRC:
13904 case PPC::SELECT_VSRC:
13905 return true;
13906 default:
13907 return false;
13908 }
13909}
13910
13913 MachineBasicBlock *BB) const {
13914 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13915 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13916 if (Subtarget.is64BitELFABI() &&
13917 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13918 !Subtarget.isUsingPCRelativeCalls()) {
13919 // Call lowering should have added an r2 operand to indicate a dependence
13920 // on the TOC base pointer value. It can't however, because there is no
13921 // way to mark the dependence as implicit there, and so the stackmap code
13922 // will confuse it with a regular operand. Instead, add the dependence
13923 // here.
13924 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13925 }
13926
13927 return emitPatchPoint(MI, BB);
13928 }
13929
13930 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13931 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13932 return emitEHSjLjSetJmp(MI, BB);
13933 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13934 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13935 return emitEHSjLjLongJmp(MI, BB);
13936 }
13937
13938 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13939
13940 // To "insert" these instructions we actually have to insert their
13941 // control-flow patterns.
13942 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13944
13945 MachineFunction *F = BB->getParent();
13946 MachineRegisterInfo &MRI = F->getRegInfo();
13947
13948 if (Subtarget.hasISEL() &&
13949 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13950 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13951 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13953 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13954 MI.getOpcode() == PPC::SELECT_CC_I8)
13955 Cond.push_back(MI.getOperand(4));
13956 else
13958 Cond.push_back(MI.getOperand(1));
13959
13960 DebugLoc dl = MI.getDebugLoc();
13961 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13962 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13963 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13964 // The incoming instruction knows the destination vreg to set, the
13965 // condition code register to branch on, the true/false values to
13966 // select between, and a branch opcode to use.
13967
13968 // thisMBB:
13969 // ...
13970 // TrueVal = ...
13971 // cmpTY ccX, r1, r2
13972 // bCC sinkMBB
13973 // fallthrough --> copy0MBB
13974 MachineBasicBlock *thisMBB = BB;
13975 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13976 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13977 DebugLoc dl = MI.getDebugLoc();
13978 F->insert(It, copy0MBB);
13979 F->insert(It, sinkMBB);
13980
13981 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
13982 copy0MBB->addLiveIn(PPC::CARRY);
13983 sinkMBB->addLiveIn(PPC::CARRY);
13984 }
13985
13986 // Set the call frame size on entry to the new basic blocks.
13987 // See https://reviews.llvm.org/D156113.
13988 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13989 copy0MBB->setCallFrameSize(CallFrameSize);
13990 sinkMBB->setCallFrameSize(CallFrameSize);
13991
13992 // Transfer the remainder of BB and its successor edges to sinkMBB.
13993 sinkMBB->splice(sinkMBB->begin(), BB,
13994 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13996
13997 // Next, add the true and fallthrough blocks as its successors.
13998 BB->addSuccessor(copy0MBB);
13999 BB->addSuccessor(sinkMBB);
14000
14001 if (IsSelect(MI)) {
14002 BuildMI(BB, dl, TII->get(PPC::BC))
14003 .addReg(MI.getOperand(1).getReg())
14004 .addMBB(sinkMBB);
14005 } else {
14006 unsigned SelectPred = MI.getOperand(4).getImm();
14007 BuildMI(BB, dl, TII->get(PPC::BCC))
14008 .addImm(SelectPred)
14009 .addReg(MI.getOperand(1).getReg())
14010 .addMBB(sinkMBB);
14011 }
14012
14013 // copy0MBB:
14014 // %FalseValue = ...
14015 // # fallthrough to sinkMBB
14016 BB = copy0MBB;
14017
14018 // Update machine-CFG edges
14019 BB->addSuccessor(sinkMBB);
14020
14021 // sinkMBB:
14022 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
14023 // ...
14024 BB = sinkMBB;
14025 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
14026 .addReg(MI.getOperand(3).getReg())
14027 .addMBB(copy0MBB)
14028 .addReg(MI.getOperand(2).getReg())
14029 .addMBB(thisMBB);
14030 } else if (MI.getOpcode() == PPC::ReadTB) {
14031 // To read the 64-bit time-base register on a 32-bit target, we read the
14032 // two halves. Should the counter have wrapped while it was being read, we
14033 // need to try again.
14034 // ...
14035 // readLoop:
14036 // mfspr Rx,TBU # load from TBU
14037 // mfspr Ry,TB # load from TB
14038 // mfspr Rz,TBU # load from TBU
14039 // cmpw crX,Rx,Rz # check if 'old'='new'
14040 // bne readLoop # branch if they're not equal
14041 // ...
14042
14043 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
14044 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
14045 DebugLoc dl = MI.getDebugLoc();
14046 F->insert(It, readMBB);
14047 F->insert(It, sinkMBB);
14048
14049 // Transfer the remainder of BB and its successor edges to sinkMBB.
14050 sinkMBB->splice(sinkMBB->begin(), BB,
14051 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14053
14054 BB->addSuccessor(readMBB);
14055 BB = readMBB;
14056
14057 MachineRegisterInfo &RegInfo = F->getRegInfo();
14058 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
14059 Register LoReg = MI.getOperand(0).getReg();
14060 Register HiReg = MI.getOperand(1).getReg();
14061
14062 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
14063 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
14064 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
14065
14066 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14067
14068 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
14069 .addReg(HiReg)
14070 .addReg(ReadAgainReg);
14071 BuildMI(BB, dl, TII->get(PPC::BCC))
14073 .addReg(CmpReg)
14074 .addMBB(readMBB);
14075
14076 BB->addSuccessor(readMBB);
14077 BB->addSuccessor(sinkMBB);
14078 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
14079 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
14080 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
14081 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
14082 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
14083 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
14084 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
14085 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
14086
14087 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
14088 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
14089 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
14090 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
14091 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
14092 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
14093 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
14094 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
14095
14096 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
14097 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
14098 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
14099 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
14100 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
14101 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
14102 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
14103 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
14104
14105 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
14106 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
14107 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
14108 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
14109 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14110 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
14111 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14112 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
14113
14114 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14115 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
14116 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14117 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
14118 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14119 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
14120 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14121 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
14122
14123 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14124 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
14125 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14126 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
14127 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14128 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
14129 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14130 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
14131
14132 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14133 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
14134 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14135 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
14136 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14137 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
14138 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14139 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
14140
14141 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14142 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
14143 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14144 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
14145 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14146 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
14147 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14148 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
14149
14150 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14151 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
14152 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14153 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
14154 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14155 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
14156 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14157 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
14158
14159 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14160 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
14161 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14162 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
14163 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14164 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
14165 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14166 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
14167
14168 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14169 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
14170 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14171 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
14172 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14173 BB = EmitAtomicBinary(MI, BB, 4, 0);
14174 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14175 BB = EmitAtomicBinary(MI, BB, 8, 0);
14176 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14177 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14178 (Subtarget.hasPartwordAtomics() &&
14179 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14180 (Subtarget.hasPartwordAtomics() &&
14181 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14182 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14183
14184 auto LoadMnemonic = PPC::LDARX;
14185 auto StoreMnemonic = PPC::STDCX;
14186 switch (MI.getOpcode()) {
14187 default:
14188 llvm_unreachable("Compare and swap of unknown size");
14189 case PPC::ATOMIC_CMP_SWAP_I8:
14190 LoadMnemonic = PPC::LBARX;
14191 StoreMnemonic = PPC::STBCX;
14192 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14193 break;
14194 case PPC::ATOMIC_CMP_SWAP_I16:
14195 LoadMnemonic = PPC::LHARX;
14196 StoreMnemonic = PPC::STHCX;
14197 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14198 break;
14199 case PPC::ATOMIC_CMP_SWAP_I32:
14200 LoadMnemonic = PPC::LWARX;
14201 StoreMnemonic = PPC::STWCX;
14202 break;
14203 case PPC::ATOMIC_CMP_SWAP_I64:
14204 LoadMnemonic = PPC::LDARX;
14205 StoreMnemonic = PPC::STDCX;
14206 break;
14207 }
14208 MachineRegisterInfo &RegInfo = F->getRegInfo();
14209 Register dest = MI.getOperand(0).getReg();
14210 Register ptrA = MI.getOperand(1).getReg();
14211 Register ptrB = MI.getOperand(2).getReg();
14212 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14213 Register oldval = MI.getOperand(3).getReg();
14214 Register newval = MI.getOperand(4).getReg();
14215 DebugLoc dl = MI.getDebugLoc();
14216
14217 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14218 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14219 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14220 F->insert(It, loop1MBB);
14221 F->insert(It, loop2MBB);
14222 F->insert(It, exitMBB);
14223 exitMBB->splice(exitMBB->begin(), BB,
14224 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14226
14227 // thisMBB:
14228 // ...
14229 // fallthrough --> loopMBB
14230 BB->addSuccessor(loop1MBB);
14231
14232 // loop1MBB:
14233 // l[bhwd]arx dest, ptr
14234 // cmp[wd] dest, oldval
14235 // bne- exitBB
14236 // loop2MBB:
14237 // st[bhwd]cx. newval, ptr
14238 // bne- loopMBB
14239 // b exitBB
14240 // exitBB:
14241 BB = loop1MBB;
14242 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14243 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14244 .addReg(dest)
14245 .addReg(oldval);
14246 BuildMI(BB, dl, TII->get(PPC::BCC))
14248 .addReg(CrReg)
14249 .addMBB(exitMBB);
14250 BB->addSuccessor(loop2MBB);
14251 BB->addSuccessor(exitMBB);
14252
14253 BB = loop2MBB;
14254 BuildMI(BB, dl, TII->get(StoreMnemonic))
14255 .addReg(newval)
14256 .addReg(ptrA)
14257 .addReg(ptrB);
14258 BuildMI(BB, dl, TII->get(PPC::BCC))
14260 .addReg(PPC::CR0)
14261 .addMBB(loop1MBB);
14262 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14263 BB->addSuccessor(loop1MBB);
14264 BB->addSuccessor(exitMBB);
14265
14266 // exitMBB:
14267 // ...
14268 BB = exitMBB;
14269 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14270 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14271 // We must use 64-bit registers for addresses when targeting 64-bit,
14272 // since we're actually doing arithmetic on them. Other registers
14273 // can be 32-bit.
14274 bool is64bit = Subtarget.isPPC64();
14275 bool isLittleEndian = Subtarget.isLittleEndian();
14276 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14277
14278 Register dest = MI.getOperand(0).getReg();
14279 Register ptrA = MI.getOperand(1).getReg();
14280 Register ptrB = MI.getOperand(2).getReg();
14281 Register oldval = MI.getOperand(3).getReg();
14282 Register newval = MI.getOperand(4).getReg();
14283 DebugLoc dl = MI.getDebugLoc();
14284
14285 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14286 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14287 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14288 F->insert(It, loop1MBB);
14289 F->insert(It, loop2MBB);
14290 F->insert(It, exitMBB);
14291 exitMBB->splice(exitMBB->begin(), BB,
14292 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14294
14295 MachineRegisterInfo &RegInfo = F->getRegInfo();
14296 const TargetRegisterClass *RC =
14297 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14298 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14299
14300 Register PtrReg = RegInfo.createVirtualRegister(RC);
14301 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
14302 Register ShiftReg =
14303 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
14304 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
14305 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
14306 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
14307 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
14308 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
14309 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
14310 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
14311 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
14312 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
14313 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
14314 Register Ptr1Reg;
14315 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
14316 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14317 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14318 // thisMBB:
14319 // ...
14320 // fallthrough --> loopMBB
14321 BB->addSuccessor(loop1MBB);
14322
14323 // The 4-byte load must be aligned, while a char or short may be
14324 // anywhere in the word. Hence all this nasty bookkeeping code.
14325 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14326 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14327 // xori shift, shift1, 24 [16]
14328 // rlwinm ptr, ptr1, 0, 0, 29
14329 // slw newval2, newval, shift
14330 // slw oldval2, oldval,shift
14331 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14332 // slw mask, mask2, shift
14333 // and newval3, newval2, mask
14334 // and oldval3, oldval2, mask
14335 // loop1MBB:
14336 // lwarx tmpDest, ptr
14337 // and tmp, tmpDest, mask
14338 // cmpw tmp, oldval3
14339 // bne- exitBB
14340 // loop2MBB:
14341 // andc tmp2, tmpDest, mask
14342 // or tmp4, tmp2, newval3
14343 // stwcx. tmp4, ptr
14344 // bne- loop1MBB
14345 // b exitBB
14346 // exitBB:
14347 // srw dest, tmpDest, shift
14348 if (ptrA != ZeroReg) {
14349 Ptr1Reg = RegInfo.createVirtualRegister(RC);
14350 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14351 .addReg(ptrA)
14352 .addReg(ptrB);
14353 } else {
14354 Ptr1Reg = ptrB;
14355 }
14356
14357 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14358 // mode.
14359 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14360 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
14361 .addImm(3)
14362 .addImm(27)
14363 .addImm(is8bit ? 28 : 27);
14364 if (!isLittleEndian)
14365 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14366 .addReg(Shift1Reg)
14367 .addImm(is8bit ? 24 : 16);
14368 if (is64bit)
14369 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14370 .addReg(Ptr1Reg)
14371 .addImm(0)
14372 .addImm(61);
14373 else
14374 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14375 .addReg(Ptr1Reg)
14376 .addImm(0)
14377 .addImm(0)
14378 .addImm(29);
14379 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14380 .addReg(newval)
14381 .addReg(ShiftReg);
14382 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14383 .addReg(oldval)
14384 .addReg(ShiftReg);
14385 if (is8bit)
14386 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14387 else {
14388 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14389 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14390 .addReg(Mask3Reg)
14391 .addImm(65535);
14392 }
14393 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14394 .addReg(Mask2Reg)
14395 .addReg(ShiftReg);
14396 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14397 .addReg(NewVal2Reg)
14398 .addReg(MaskReg);
14399 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14400 .addReg(OldVal2Reg)
14401 .addReg(MaskReg);
14402
14403 BB = loop1MBB;
14404 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14405 .addReg(ZeroReg)
14406 .addReg(PtrReg);
14407 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14408 .addReg(TmpDestReg)
14409 .addReg(MaskReg);
14410 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
14411 .addReg(TmpReg)
14412 .addReg(OldVal3Reg);
14413 BuildMI(BB, dl, TII->get(PPC::BCC))
14415 .addReg(CrReg)
14416 .addMBB(exitMBB);
14417 BB->addSuccessor(loop2MBB);
14418 BB->addSuccessor(exitMBB);
14419
14420 BB = loop2MBB;
14421 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14422 .addReg(TmpDestReg)
14423 .addReg(MaskReg);
14424 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14425 .addReg(Tmp2Reg)
14426 .addReg(NewVal3Reg);
14427 BuildMI(BB, dl, TII->get(PPC::STWCX))
14428 .addReg(Tmp4Reg)
14429 .addReg(ZeroReg)
14430 .addReg(PtrReg);
14431 BuildMI(BB, dl, TII->get(PPC::BCC))
14433 .addReg(PPC::CR0)
14434 .addMBB(loop1MBB);
14435 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14436 BB->addSuccessor(loop1MBB);
14437 BB->addSuccessor(exitMBB);
14438
14439 // exitMBB:
14440 // ...
14441 BB = exitMBB;
14442 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14443 .addReg(TmpReg)
14444 .addReg(ShiftReg);
14445 } else if (MI.getOpcode() == PPC::FADDrtz) {
14446 // This pseudo performs an FADD with rounding mode temporarily forced
14447 // to round-to-zero. We emit this via custom inserter since the FPSCR
14448 // is not modeled at the SelectionDAG level.
14449 Register Dest = MI.getOperand(0).getReg();
14450 Register Src1 = MI.getOperand(1).getReg();
14451 Register Src2 = MI.getOperand(2).getReg();
14452 DebugLoc dl = MI.getDebugLoc();
14453
14454 MachineRegisterInfo &RegInfo = F->getRegInfo();
14455 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14456
14457 // Save FPSCR value.
14458 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14459
14460 // Set rounding mode to round-to-zero.
14461 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14462 .addImm(31)
14464
14465 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14466 .addImm(30)
14468
14469 // Perform addition.
14470 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14471 .addReg(Src1)
14472 .addReg(Src2);
14473 if (MI.getFlag(MachineInstr::NoFPExcept))
14475
14476 // Restore FPSCR value.
14477 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14478 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14479 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14480 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14481 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14482 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14483 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14484 ? PPC::ANDI8_rec
14485 : PPC::ANDI_rec;
14486 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14487 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14488
14489 MachineRegisterInfo &RegInfo = F->getRegInfo();
14490 Register Dest = RegInfo.createVirtualRegister(
14491 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14492
14493 DebugLoc Dl = MI.getDebugLoc();
14494 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14495 .addReg(MI.getOperand(1).getReg())
14496 .addImm(1);
14497 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14498 MI.getOperand(0).getReg())
14499 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14500 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14501 DebugLoc Dl = MI.getDebugLoc();
14502 MachineRegisterInfo &RegInfo = F->getRegInfo();
14503 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14504 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14505 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14506 MI.getOperand(0).getReg())
14507 .addReg(CRReg);
14508 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14509 DebugLoc Dl = MI.getDebugLoc();
14510 unsigned Imm = MI.getOperand(1).getImm();
14511 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14512 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14513 MI.getOperand(0).getReg())
14514 .addReg(PPC::CR0EQ);
14515 } else if (MI.getOpcode() == PPC::SETRNDi) {
14516 DebugLoc dl = MI.getDebugLoc();
14517 Register OldFPSCRReg = MI.getOperand(0).getReg();
14518
14519 // Save FPSCR value.
14520 if (MRI.use_empty(OldFPSCRReg))
14521 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14522 else
14523 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14524
14525 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14526 // the following settings:
14527 // 00 Round to nearest
14528 // 01 Round to 0
14529 // 10 Round to +inf
14530 // 11 Round to -inf
14531
14532 // When the operand is immediate, using the two least significant bits of
14533 // the immediate to set the bits 62:63 of FPSCR.
14534 unsigned Mode = MI.getOperand(1).getImm();
14535 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14536 .addImm(31)
14538
14539 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14540 .addImm(30)
14542 } else if (MI.getOpcode() == PPC::SETRND) {
14543 DebugLoc dl = MI.getDebugLoc();
14544
14545 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14546 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14547 // If the target doesn't have DirectMove, we should use stack to do the
14548 // conversion, because the target doesn't have the instructions like mtvsrd
14549 // or mfvsrd to do this conversion directly.
14550 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14551 if (Subtarget.hasDirectMove()) {
14552 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14553 .addReg(SrcReg);
14554 } else {
14555 // Use stack to do the register copy.
14556 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14557 MachineRegisterInfo &RegInfo = F->getRegInfo();
14558 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14559 if (RC == &PPC::F8RCRegClass) {
14560 // Copy register from F8RCRegClass to G8RCRegclass.
14561 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14562 "Unsupported RegClass.");
14563
14564 StoreOp = PPC::STFD;
14565 LoadOp = PPC::LD;
14566 } else {
14567 // Copy register from G8RCRegClass to F8RCRegclass.
14568 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14569 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14570 "Unsupported RegClass.");
14571 }
14572
14573 MachineFrameInfo &MFI = F->getFrameInfo();
14574 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14575
14576 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14577 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14579 MFI.getObjectAlign(FrameIdx));
14580
14581 // Store the SrcReg into the stack.
14582 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14583 .addReg(SrcReg)
14584 .addImm(0)
14585 .addFrameIndex(FrameIdx)
14586 .addMemOperand(MMOStore);
14587
14588 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14589 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14591 MFI.getObjectAlign(FrameIdx));
14592
14593 // Load from the stack where SrcReg is stored, and save to DestReg,
14594 // so we have done the RegClass conversion from RegClass::SrcReg to
14595 // RegClass::DestReg.
14596 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14597 .addImm(0)
14598 .addFrameIndex(FrameIdx)
14599 .addMemOperand(MMOLoad);
14600 }
14601 };
14602
14603 Register OldFPSCRReg = MI.getOperand(0).getReg();
14604
14605 // Save FPSCR value.
14606 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14607
14608 // When the operand is gprc register, use two least significant bits of the
14609 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14610 //
14611 // copy OldFPSCRTmpReg, OldFPSCRReg
14612 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14613 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14614 // copy NewFPSCRReg, NewFPSCRTmpReg
14615 // mtfsf 255, NewFPSCRReg
14616 MachineOperand SrcOp = MI.getOperand(1);
14617 MachineRegisterInfo &RegInfo = F->getRegInfo();
14618 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14619
14620 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14621
14622 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14623 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14624
14625 // The first operand of INSERT_SUBREG should be a register which has
14626 // subregisters, we only care about its RegClass, so we should use an
14627 // IMPLICIT_DEF register.
14628 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14629 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14630 .addReg(ImDefReg)
14631 .add(SrcOp)
14632 .addImm(1);
14633
14634 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14635 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14636 .addReg(OldFPSCRTmpReg)
14637 .addReg(ExtSrcReg)
14638 .addImm(0)
14639 .addImm(62);
14640
14641 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14642 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14643
14644 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14645 // bits of FPSCR.
14646 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14647 .addImm(255)
14648 .addReg(NewFPSCRReg)
14649 .addImm(0)
14650 .addImm(0);
14651 } else if (MI.getOpcode() == PPC::SETFLM) {
14652 DebugLoc Dl = MI.getDebugLoc();
14653
14654 // Result of setflm is previous FPSCR content, so we need to save it first.
14655 Register OldFPSCRReg = MI.getOperand(0).getReg();
14656 if (MRI.use_empty(OldFPSCRReg))
14657 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14658 else
14659 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14660
14661 // Put bits in 32:63 to FPSCR.
14662 Register NewFPSCRReg = MI.getOperand(1).getReg();
14663 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14664 .addImm(255)
14665 .addReg(NewFPSCRReg)
14666 .addImm(0)
14667 .addImm(0);
14668 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14669 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14670 return emitProbedAlloca(MI, BB);
14671 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14672 DebugLoc DL = MI.getDebugLoc();
14673 Register Src = MI.getOperand(2).getReg();
14674 Register Lo = MI.getOperand(0).getReg();
14675 Register Hi = MI.getOperand(1).getReg();
14676 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14677 .addDef(Lo)
14678 .addUse(Src, {}, PPC::sub_gp8_x1);
14679 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14680 .addDef(Hi)
14681 .addUse(Src, {}, PPC::sub_gp8_x0);
14682 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14683 MI.getOpcode() == PPC::STQX_PSEUDO) {
14684 DebugLoc DL = MI.getDebugLoc();
14685 // Ptr is used as the ptr_rc_no_r0 part
14686 // of LQ/STQ's memory operand and adding result of RA and RB,
14687 // so it has to be g8rc_and_g8rc_nox0.
14688 Register Ptr =
14689 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14690 Register Val = MI.getOperand(0).getReg();
14691 Register RA = MI.getOperand(1).getReg();
14692 Register RB = MI.getOperand(2).getReg();
14693 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14694 BuildMI(*BB, MI, DL,
14695 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14696 : TII->get(PPC::STQ))
14697 .addReg(Val, getDefRegState(MI.getOpcode() == PPC::LQX_PSEUDO))
14698 .addImm(0)
14699 .addReg(Ptr);
14700 } else if (MI.getOpcode() == PPC::LWAT_PSEUDO ||
14701 MI.getOpcode() == PPC::LDAT_PSEUDO) {
14702 DebugLoc DL = MI.getDebugLoc();
14703 Register DstReg = MI.getOperand(0).getReg();
14704 Register PtrReg = MI.getOperand(1).getReg();
14705 Register ValReg = MI.getOperand(2).getReg();
14706 unsigned FC = MI.getOperand(3).getImm();
14707 bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO;
14708 Register Val64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14709 if (IsLwat)
14710 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), Val64)
14711 .addReg(ValReg)
14712 .addImm(PPC::sub_32);
14713 else
14714 Val64 = ValReg;
14715
14716 Register G8rPair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14717 Register UndefG8r = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14718 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), UndefG8r);
14719 BuildMI(*BB, MI, DL, TII->get(PPC::REG_SEQUENCE), G8rPair)
14720 .addReg(UndefG8r)
14721 .addImm(PPC::sub_gp8_x0)
14722 .addReg(Val64)
14723 .addImm(PPC::sub_gp8_x1);
14724
14725 Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14726 BuildMI(*BB, MI, DL, TII->get(IsLwat ? PPC::LWAT : PPC::LDAT), PairResult)
14727 .addReg(G8rPair)
14728 .addReg(PtrReg)
14729 .addImm(FC);
14730 Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14731 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
14732 .addReg(PairResult, {}, PPC::sub_gp8_x0);
14733 if (IsLwat)
14734 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14735 .addReg(Result64, {}, PPC::sub_32);
14736 else
14737 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14738 .addReg(Result64);
14739 } else if (MI.getOpcode() == PPC::LWAT_COND_PSEUDO ||
14740 MI.getOpcode() == PPC::LDAT_COND_PSEUDO) {
14741 DebugLoc DL = MI.getDebugLoc();
14742 Register DstReg = MI.getOperand(0).getReg();
14743 Register PtrReg = MI.getOperand(1).getReg();
14744 unsigned FC = MI.getOperand(2).getImm();
14745 bool IsLwat_Cond = MI.getOpcode() == PPC::LWAT_COND_PSEUDO;
14746
14747 Register Pair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14748 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Pair);
14749
14750 Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14751 BuildMI(*BB, MI, DL, TII->get(IsLwat_Cond ? PPC::LWAT : PPC::LDAT),
14752 PairResult)
14753 .addReg(Pair)
14754 .addReg(PtrReg)
14755 .addImm(FC);
14756 Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14757 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
14758 .addReg(PairResult, {}, PPC::sub_gp8_x0);
14759 if (IsLwat_Cond)
14760 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14761 .addReg(Result64, {}, PPC::sub_32);
14762 else
14763 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14764 .addReg(Result64);
14765 } else {
14766 llvm_unreachable("Unexpected instr type to insert");
14767 }
14768
14769 MI.eraseFromParent(); // The pseudo instruction is gone now.
14770 return BB;
14771}
14772
14773//===----------------------------------------------------------------------===//
14774// Target Optimization Hooks
14775//===----------------------------------------------------------------------===//
14776
14777static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14778 // For the estimates, convergence is quadratic, so we essentially double the
14779 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14780 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14781 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14782 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14783 if (VT.getScalarType() == MVT::f64)
14784 RefinementSteps++;
14785 return RefinementSteps;
14786}
14787
14788SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14789 const DenormalMode &Mode,
14790 SDNodeFlags Flags) const {
14791 // We only have VSX Vector Test for software Square Root.
14792 EVT VT = Op.getValueType();
14793 if (!isTypeLegal(MVT::i1) ||
14794 (VT != MVT::f64 &&
14795 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14796 return TargetLowering::getSqrtInputTest(Op, DAG, Mode, Flags);
14797
14798 SDLoc DL(Op);
14799 // The output register of FTSQRT is CR field.
14800 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op, Flags);
14801 // ftsqrt BF,FRB
14802 // Let e_b be the unbiased exponent of the double-precision
14803 // floating-point operand in register FRB.
14804 // fe_flag is set to 1 if either of the following conditions occurs.
14805 // - The double-precision floating-point operand in register FRB is a zero,
14806 // a NaN, or an infinity, or a negative value.
14807 // - e_b is less than or equal to -970.
14808 // Otherwise fe_flag is set to 0.
14809 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14810 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14811 // exponent is less than -970)
14812 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14813 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14814 FTSQRT, SRIdxVal),
14815 0);
14816}
14817
14818SDValue
14819PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14820 SelectionDAG &DAG) const {
14821 // We only have VSX Vector Square Root.
14822 EVT VT = Op.getValueType();
14823 if (VT != MVT::f64 &&
14824 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14826
14827 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14828}
14829
14830SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14831 int Enabled, int &RefinementSteps,
14832 bool &UseOneConstNR,
14833 bool Reciprocal) const {
14834 EVT VT = Operand.getValueType();
14835 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14836 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14837 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14838 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14839 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14840 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14841
14842 // The Newton-Raphson computation with a single constant does not provide
14843 // enough accuracy on some CPUs.
14844 UseOneConstNR = !Subtarget.needsTwoConstNR();
14845 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14846 }
14847 return SDValue();
14848}
14849
14850SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14851 int Enabled,
14852 int &RefinementSteps) const {
14853 EVT VT = Operand.getValueType();
14854 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14855 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14856 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14857 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14858 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14859 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14860 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14861 }
14862 return SDValue();
14863}
14864
14866 // Note: This functionality is used only when arcp is enabled, and
14867 // on cores with reciprocal estimates (which are used when arcp is
14868 // enabled for division), this functionality is redundant with the default
14869 // combiner logic (once the division -> reciprocal/multiply transformation
14870 // has taken place). As a result, this matters more for older cores than for
14871 // newer ones.
14872
14873 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14874 // reciprocal if there are two or more FDIVs (for embedded cores with only
14875 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14876 switch (Subtarget.getCPUDirective()) {
14877 default:
14878 return 3;
14879 case PPC::DIR_440:
14880 case PPC::DIR_A2:
14881 case PPC::DIR_E500:
14882 case PPC::DIR_E500mc:
14883 case PPC::DIR_E5500:
14884 return 2;
14885 }
14886}
14887
14888// isConsecutiveLSLoc needs to work even if all adds have not yet been
14889// collapsed, and so we need to look through chains of them.
14891 int64_t& Offset, SelectionDAG &DAG) {
14892 if (DAG.isBaseWithConstantOffset(Loc)) {
14893 Base = Loc.getOperand(0);
14894 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14895
14896 // The base might itself be a base plus an offset, and if so, accumulate
14897 // that as well.
14898 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
14899 }
14900}
14901
14903 unsigned Bytes, int Dist,
14904 SelectionDAG &DAG) {
14905 if (VT.getSizeInBits() / 8 != Bytes)
14906 return false;
14907
14908 SDValue BaseLoc = Base->getBasePtr();
14909 if (Loc.getOpcode() == ISD::FrameIndex) {
14910 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14911 return false;
14913 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14914 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14915 int FS = MFI.getObjectSize(FI);
14916 int BFS = MFI.getObjectSize(BFI);
14917 if (FS != BFS || FS != (int)Bytes) return false;
14918 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14919 }
14920
14921 SDValue Base1 = Loc, Base2 = BaseLoc;
14922 int64_t Offset1 = 0, Offset2 = 0;
14923 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14924 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14925 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14926 return true;
14927
14928 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14929 const GlobalValue *GV1 = nullptr;
14930 const GlobalValue *GV2 = nullptr;
14931 Offset1 = 0;
14932 Offset2 = 0;
14933 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14934 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14935 if (isGA1 && isGA2 && GV1 == GV2)
14936 return Offset1 == (Offset2 + Dist*Bytes);
14937 return false;
14938}
14939
14940// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14941// not enforce equality of the chain operands.
14943 unsigned Bytes, int Dist,
14944 SelectionDAG &DAG) {
14946 EVT VT = LS->getMemoryVT();
14947 SDValue Loc = LS->getBasePtr();
14948 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14949 }
14950
14951 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14952 EVT VT;
14953 switch (N->getConstantOperandVal(1)) {
14954 default: return false;
14955 case Intrinsic::ppc_altivec_lvx:
14956 case Intrinsic::ppc_altivec_lvxl:
14957 case Intrinsic::ppc_vsx_lxvw4x:
14958 case Intrinsic::ppc_vsx_lxvw4x_be:
14959 VT = MVT::v4i32;
14960 break;
14961 case Intrinsic::ppc_vsx_lxvd2x:
14962 case Intrinsic::ppc_vsx_lxvd2x_be:
14963 VT = MVT::v2f64;
14964 break;
14965 case Intrinsic::ppc_altivec_lvebx:
14966 VT = MVT::i8;
14967 break;
14968 case Intrinsic::ppc_altivec_lvehx:
14969 VT = MVT::i16;
14970 break;
14971 case Intrinsic::ppc_altivec_lvewx:
14972 VT = MVT::i32;
14973 break;
14974 }
14975
14976 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14977 }
14978
14979 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14980 EVT VT;
14981 switch (N->getConstantOperandVal(1)) {
14982 default: return false;
14983 case Intrinsic::ppc_altivec_stvx:
14984 case Intrinsic::ppc_altivec_stvxl:
14985 case Intrinsic::ppc_vsx_stxvw4x:
14986 VT = MVT::v4i32;
14987 break;
14988 case Intrinsic::ppc_vsx_stxvd2x:
14989 VT = MVT::v2f64;
14990 break;
14991 case Intrinsic::ppc_vsx_stxvw4x_be:
14992 VT = MVT::v4i32;
14993 break;
14994 case Intrinsic::ppc_vsx_stxvd2x_be:
14995 VT = MVT::v2f64;
14996 break;
14997 case Intrinsic::ppc_altivec_stvebx:
14998 VT = MVT::i8;
14999 break;
15000 case Intrinsic::ppc_altivec_stvehx:
15001 VT = MVT::i16;
15002 break;
15003 case Intrinsic::ppc_altivec_stvewx:
15004 VT = MVT::i32;
15005 break;
15006 }
15007
15008 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
15009 }
15010
15011 return false;
15012}
15013
15014// Return true is there is a nearyby consecutive load to the one provided
15015// (regardless of alignment). We search up and down the chain, looking though
15016// token factors and other loads (but nothing else). As a result, a true result
15017// indicates that it is safe to create a new consecutive load adjacent to the
15018// load provided.
15020 SDValue Chain = LD->getChain();
15021 EVT VT = LD->getMemoryVT();
15022
15023 SmallPtrSet<SDNode *, 16> LoadRoots;
15024 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
15026
15027 // First, search up the chain, branching to follow all token-factor operands.
15028 // If we find a consecutive load, then we're done, otherwise, record all
15029 // nodes just above the top-level loads and token factors.
15030 while (!Queue.empty()) {
15031 SDNode *ChainNext = Queue.pop_back_val();
15032 if (!Visited.insert(ChainNext).second)
15033 continue;
15034
15035 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
15036 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15037 return true;
15038
15039 if (!Visited.count(ChainLD->getChain().getNode()))
15040 Queue.push_back(ChainLD->getChain().getNode());
15041 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
15042 for (const SDUse &O : ChainNext->ops())
15043 if (!Visited.count(O.getNode()))
15044 Queue.push_back(O.getNode());
15045 } else
15046 LoadRoots.insert(ChainNext);
15047 }
15048
15049 // Second, search down the chain, starting from the top-level nodes recorded
15050 // in the first phase. These top-level nodes are the nodes just above all
15051 // loads and token factors. Starting with their uses, recursively look though
15052 // all loads (just the chain uses) and token factors to find a consecutive
15053 // load.
15054 Visited.clear();
15055 Queue.clear();
15056
15057 for (SDNode *I : LoadRoots) {
15058 Queue.push_back(I);
15059
15060 while (!Queue.empty()) {
15061 SDNode *LoadRoot = Queue.pop_back_val();
15062 if (!Visited.insert(LoadRoot).second)
15063 continue;
15064
15065 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
15066 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15067 return true;
15068
15069 for (SDNode *U : LoadRoot->users())
15070 if (((isa<MemSDNode>(U) &&
15071 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
15072 U->getOpcode() == ISD::TokenFactor) &&
15073 !Visited.count(U))
15074 Queue.push_back(U);
15075 }
15076 }
15077
15078 return false;
15079}
15080
15081/// This function is called when we have proved that a SETCC node can be replaced
15082/// by subtraction (and other supporting instructions) so that the result of
15083/// comparison is kept in a GPR instead of CR. This function is purely for
15084/// codegen purposes and has some flags to guide the codegen process.
15085static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
15086 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
15087 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15088
15089 // Zero extend the operands to the largest legal integer. Originally, they
15090 // must be of a strictly smaller size.
15091 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
15092 DAG.getConstant(Size, DL, MVT::i32));
15093 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
15094 DAG.getConstant(Size, DL, MVT::i32));
15095
15096 // Swap if needed. Depends on the condition code.
15097 if (Swap)
15098 std::swap(Op0, Op1);
15099
15100 // Subtract extended integers.
15101 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
15102
15103 // Move the sign bit to the least significant position and zero out the rest.
15104 // Now the least significant bit carries the result of original comparison.
15105 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
15106 DAG.getConstant(Size - 1, DL, MVT::i32));
15107 auto Final = Shifted;
15108
15109 // Complement the result if needed. Based on the condition code.
15110 if (Complement)
15111 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
15112 DAG.getConstant(1, DL, MVT::i64));
15113
15114 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
15115}
15116
15117SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
15118 DAGCombinerInfo &DCI) const {
15119 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15120
15121 SelectionDAG &DAG = DCI.DAG;
15122 SDLoc DL(N);
15123
15124 // Size of integers being compared has a critical role in the following
15125 // analysis, so we prefer to do this when all types are legal.
15126 if (!DCI.isAfterLegalizeDAG())
15127 return SDValue();
15128
15129 // If all users of SETCC extend its value to a legal integer type
15130 // then we replace SETCC with a subtraction
15131 for (const SDNode *U : N->users())
15132 if (U->getOpcode() != ISD::ZERO_EXTEND)
15133 return SDValue();
15134
15135 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15136 auto OpSize = N->getOperand(0).getValueSizeInBits();
15137
15139
15140 if (OpSize < Size) {
15141 switch (CC) {
15142 default: break;
15143 case ISD::SETULT:
15144 return generateEquivalentSub(N, Size, false, false, DL, DAG);
15145 case ISD::SETULE:
15146 return generateEquivalentSub(N, Size, true, true, DL, DAG);
15147 case ISD::SETUGT:
15148 return generateEquivalentSub(N, Size, false, true, DL, DAG);
15149 case ISD::SETUGE:
15150 return generateEquivalentSub(N, Size, true, false, DL, DAG);
15151 }
15152 }
15153
15154 return SDValue();
15155}
15156
15157SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15158 DAGCombinerInfo &DCI) const {
15159 SelectionDAG &DAG = DCI.DAG;
15160 SDLoc dl(N);
15161
15162 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15163 // If we're tracking CR bits, we need to be careful that we don't have:
15164 // trunc(binary-ops(zext(x), zext(y)))
15165 // or
15166 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15167 // such that we're unnecessarily moving things into GPRs when it would be
15168 // better to keep them in CR bits.
15169
15170 // Note that trunc here can be an actual i1 trunc, or can be the effective
15171 // truncation that comes from a setcc or select_cc.
15172 if (N->getOpcode() == ISD::TRUNCATE &&
15173 N->getValueType(0) != MVT::i1)
15174 return SDValue();
15175
15176 if (N->getOperand(0).getValueType() != MVT::i32 &&
15177 N->getOperand(0).getValueType() != MVT::i64)
15178 return SDValue();
15179
15180 if (N->getOpcode() == ISD::SETCC ||
15181 N->getOpcode() == ISD::SELECT_CC) {
15182 // If we're looking at a comparison, then we need to make sure that the
15183 // high bits (all except for the first) don't matter the result.
15184 ISD::CondCode CC =
15185 cast<CondCodeSDNode>(N->getOperand(
15186 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15187 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15188
15189 if (ISD::isSignedIntSetCC(CC)) {
15190 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15191 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15192 return SDValue();
15193 } else if (ISD::isUnsignedIntSetCC(CC)) {
15194 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15195 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15196 !DAG.MaskedValueIsZero(N->getOperand(1),
15197 APInt::getHighBitsSet(OpBits, OpBits-1)))
15198 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15199 : SDValue());
15200 } else {
15201 // This is neither a signed nor an unsigned comparison, just make sure
15202 // that the high bits are equal.
15203 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15204 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15205
15206 // We don't really care about what is known about the first bit (if
15207 // anything), so pretend that it is known zero for both to ensure they can
15208 // be compared as constants.
15209 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15210 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15211
15212 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15213 Op1Known.getConstant() != Op2Known.getConstant())
15214 return SDValue();
15215 }
15216 }
15217
15218 // We now know that the higher-order bits are irrelevant, we just need to
15219 // make sure that all of the intermediate operations are bit operations, and
15220 // all inputs are extensions.
15221 if (N->getOperand(0).getOpcode() != ISD::AND &&
15222 N->getOperand(0).getOpcode() != ISD::OR &&
15223 N->getOperand(0).getOpcode() != ISD::XOR &&
15224 N->getOperand(0).getOpcode() != ISD::SELECT &&
15225 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15226 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15227 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15228 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15229 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15230 return SDValue();
15231
15232 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15233 N->getOperand(1).getOpcode() != ISD::AND &&
15234 N->getOperand(1).getOpcode() != ISD::OR &&
15235 N->getOperand(1).getOpcode() != ISD::XOR &&
15236 N->getOperand(1).getOpcode() != ISD::SELECT &&
15237 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15238 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15239 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15240 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15241 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15242 return SDValue();
15243
15245 SmallVector<SDValue, 8> BinOps, PromOps;
15246 SmallPtrSet<SDNode *, 16> Visited;
15247
15248 for (unsigned i = 0; i < 2; ++i) {
15249 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15250 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15251 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15252 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15253 isa<ConstantSDNode>(N->getOperand(i)))
15254 Inputs.push_back(N->getOperand(i));
15255 else
15256 BinOps.push_back(N->getOperand(i));
15257
15258 if (N->getOpcode() == ISD::TRUNCATE)
15259 break;
15260 }
15261
15262 // Visit all inputs, collect all binary operations (and, or, xor and
15263 // select) that are all fed by extensions.
15264 while (!BinOps.empty()) {
15265 SDValue BinOp = BinOps.pop_back_val();
15266
15267 if (!Visited.insert(BinOp.getNode()).second)
15268 continue;
15269
15270 PromOps.push_back(BinOp);
15271
15272 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15273 // The condition of the select is not promoted.
15274 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15275 continue;
15276 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15277 continue;
15278
15279 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15280 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15281 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15282 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15283 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15284 Inputs.push_back(BinOp.getOperand(i));
15285 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15286 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15287 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15288 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15289 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15290 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15291 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15292 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15293 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15294 BinOps.push_back(BinOp.getOperand(i));
15295 } else {
15296 // We have an input that is not an extension or another binary
15297 // operation; we'll abort this transformation.
15298 return SDValue();
15299 }
15300 }
15301 }
15302
15303 // Make sure that this is a self-contained cluster of operations (which
15304 // is not quite the same thing as saying that everything has only one
15305 // use).
15306 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15307 if (isa<ConstantSDNode>(Inputs[i]))
15308 continue;
15309
15310 for (const SDNode *User : Inputs[i].getNode()->users()) {
15311 if (User != N && !Visited.count(User))
15312 return SDValue();
15313
15314 // Make sure that we're not going to promote the non-output-value
15315 // operand(s) or SELECT or SELECT_CC.
15316 // FIXME: Although we could sometimes handle this, and it does occur in
15317 // practice that one of the condition inputs to the select is also one of
15318 // the outputs, we currently can't deal with this.
15319 if (User->getOpcode() == ISD::SELECT) {
15320 if (User->getOperand(0) == Inputs[i])
15321 return SDValue();
15322 } else if (User->getOpcode() == ISD::SELECT_CC) {
15323 if (User->getOperand(0) == Inputs[i] ||
15324 User->getOperand(1) == Inputs[i])
15325 return SDValue();
15326 }
15327 }
15328 }
15329
15330 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15331 for (const SDNode *User : PromOps[i].getNode()->users()) {
15332 if (User != N && !Visited.count(User))
15333 return SDValue();
15334
15335 // Make sure that we're not going to promote the non-output-value
15336 // operand(s) or SELECT or SELECT_CC.
15337 // FIXME: Although we could sometimes handle this, and it does occur in
15338 // practice that one of the condition inputs to the select is also one of
15339 // the outputs, we currently can't deal with this.
15340 if (User->getOpcode() == ISD::SELECT) {
15341 if (User->getOperand(0) == PromOps[i])
15342 return SDValue();
15343 } else if (User->getOpcode() == ISD::SELECT_CC) {
15344 if (User->getOperand(0) == PromOps[i] ||
15345 User->getOperand(1) == PromOps[i])
15346 return SDValue();
15347 }
15348 }
15349 }
15350
15351 // Replace all inputs with the extension operand.
15352 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15353 // Constants may have users outside the cluster of to-be-promoted nodes,
15354 // and so we need to replace those as we do the promotions.
15355 if (isa<ConstantSDNode>(Inputs[i]))
15356 continue;
15357 else
15358 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15359 }
15360
15361 std::list<HandleSDNode> PromOpHandles;
15362 for (auto &PromOp : PromOps)
15363 PromOpHandles.emplace_back(PromOp);
15364
15365 // Replace all operations (these are all the same, but have a different
15366 // (i1) return type). DAG.getNode will validate that the types of
15367 // a binary operator match, so go through the list in reverse so that
15368 // we've likely promoted both operands first. Any intermediate truncations or
15369 // extensions disappear.
15370 while (!PromOpHandles.empty()) {
15371 SDValue PromOp = PromOpHandles.back().getValue();
15372 PromOpHandles.pop_back();
15373
15374 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15375 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15376 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15377 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15378 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15379 PromOp.getOperand(0).getValueType() != MVT::i1) {
15380 // The operand is not yet ready (see comment below).
15381 PromOpHandles.emplace_front(PromOp);
15382 continue;
15383 }
15384
15385 SDValue RepValue = PromOp.getOperand(0);
15386 if (isa<ConstantSDNode>(RepValue))
15387 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15388
15389 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15390 continue;
15391 }
15392
15393 unsigned C;
15394 switch (PromOp.getOpcode()) {
15395 default: C = 0; break;
15396 case ISD::SELECT: C = 1; break;
15397 case ISD::SELECT_CC: C = 2; break;
15398 }
15399
15400 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15401 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15402 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15403 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15404 // The to-be-promoted operands of this node have not yet been
15405 // promoted (this should be rare because we're going through the
15406 // list backward, but if one of the operands has several users in
15407 // this cluster of to-be-promoted nodes, it is possible).
15408 PromOpHandles.emplace_front(PromOp);
15409 continue;
15410 }
15411
15413
15414 // If there are any constant inputs, make sure they're replaced now.
15415 for (unsigned i = 0; i < 2; ++i)
15416 if (isa<ConstantSDNode>(Ops[C+i]))
15417 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15418
15419 DAG.ReplaceAllUsesOfValueWith(PromOp,
15420 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15421 }
15422
15423 // Now we're left with the initial truncation itself.
15424 if (N->getOpcode() == ISD::TRUNCATE)
15425 return N->getOperand(0);
15426
15427 // Otherwise, this is a comparison. The operands to be compared have just
15428 // changed type (to i1), but everything else is the same.
15429 return SDValue(N, 0);
15430}
15431
15432SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15433 DAGCombinerInfo &DCI) const {
15434 SelectionDAG &DAG = DCI.DAG;
15435 SDLoc dl(N);
15436
15437 // If we're tracking CR bits, we need to be careful that we don't have:
15438 // zext(binary-ops(trunc(x), trunc(y)))
15439 // or
15440 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15441 // such that we're unnecessarily moving things into CR bits that can more
15442 // efficiently stay in GPRs. Note that if we're not certain that the high
15443 // bits are set as required by the final extension, we still may need to do
15444 // some masking to get the proper behavior.
15445
15446 // This same functionality is important on PPC64 when dealing with
15447 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15448 // the return values of functions. Because it is so similar, it is handled
15449 // here as well.
15450
15451 if (N->getValueType(0) != MVT::i32 &&
15452 N->getValueType(0) != MVT::i64)
15453 return SDValue();
15454
15455 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15456 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15457 return SDValue();
15458
15459 if (N->getOperand(0).getOpcode() != ISD::AND &&
15460 N->getOperand(0).getOpcode() != ISD::OR &&
15461 N->getOperand(0).getOpcode() != ISD::XOR &&
15462 N->getOperand(0).getOpcode() != ISD::SELECT &&
15463 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15464 return SDValue();
15465
15467 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15468 SmallPtrSet<SDNode *, 16> Visited;
15469
15470 // Visit all inputs, collect all binary operations (and, or, xor and
15471 // select) that are all fed by truncations.
15472 while (!BinOps.empty()) {
15473 SDValue BinOp = BinOps.pop_back_val();
15474
15475 if (!Visited.insert(BinOp.getNode()).second)
15476 continue;
15477
15478 PromOps.push_back(BinOp);
15479
15480 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15481 // The condition of the select is not promoted.
15482 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15483 continue;
15484 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15485 continue;
15486
15487 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15488 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15489 Inputs.push_back(BinOp.getOperand(i));
15490 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15491 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15492 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15493 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15494 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15495 BinOps.push_back(BinOp.getOperand(i));
15496 } else {
15497 // We have an input that is not a truncation or another binary
15498 // operation; we'll abort this transformation.
15499 return SDValue();
15500 }
15501 }
15502 }
15503
15504 // The operands of a select that must be truncated when the select is
15505 // promoted because the operand is actually part of the to-be-promoted set.
15506 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15507
15508 // Make sure that this is a self-contained cluster of operations (which
15509 // is not quite the same thing as saying that everything has only one
15510 // use).
15511 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15512 if (isa<ConstantSDNode>(Inputs[i]))
15513 continue;
15514
15515 for (SDNode *User : Inputs[i].getNode()->users()) {
15516 if (User != N && !Visited.count(User))
15517 return SDValue();
15518
15519 // If we're going to promote the non-output-value operand(s) or SELECT or
15520 // SELECT_CC, record them for truncation.
15521 if (User->getOpcode() == ISD::SELECT) {
15522 if (User->getOperand(0) == Inputs[i])
15523 SelectTruncOp[0].insert(std::make_pair(User,
15524 User->getOperand(0).getValueType()));
15525 } else if (User->getOpcode() == ISD::SELECT_CC) {
15526 if (User->getOperand(0) == Inputs[i])
15527 SelectTruncOp[0].insert(std::make_pair(User,
15528 User->getOperand(0).getValueType()));
15529 if (User->getOperand(1) == Inputs[i])
15530 SelectTruncOp[1].insert(std::make_pair(User,
15531 User->getOperand(1).getValueType()));
15532 }
15533 }
15534 }
15535
15536 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15537 for (SDNode *User : PromOps[i].getNode()->users()) {
15538 if (User != N && !Visited.count(User))
15539 return SDValue();
15540
15541 // If we're going to promote the non-output-value operand(s) or SELECT or
15542 // SELECT_CC, record them for truncation.
15543 if (User->getOpcode() == ISD::SELECT) {
15544 if (User->getOperand(0) == PromOps[i])
15545 SelectTruncOp[0].insert(std::make_pair(User,
15546 User->getOperand(0).getValueType()));
15547 } else if (User->getOpcode() == ISD::SELECT_CC) {
15548 if (User->getOperand(0) == PromOps[i])
15549 SelectTruncOp[0].insert(std::make_pair(User,
15550 User->getOperand(0).getValueType()));
15551 if (User->getOperand(1) == PromOps[i])
15552 SelectTruncOp[1].insert(std::make_pair(User,
15553 User->getOperand(1).getValueType()));
15554 }
15555 }
15556 }
15557
15558 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15559 bool ReallyNeedsExt = false;
15560 if (N->getOpcode() != ISD::ANY_EXTEND) {
15561 // If all of the inputs are not already sign/zero extended, then
15562 // we'll still need to do that at the end.
15563 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15564 if (isa<ConstantSDNode>(Inputs[i]))
15565 continue;
15566
15567 unsigned OpBits =
15568 Inputs[i].getOperand(0).getValueSizeInBits();
15569 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15570
15571 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15572 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15573 APInt::getHighBitsSet(OpBits,
15574 OpBits-PromBits))) ||
15575 (N->getOpcode() == ISD::SIGN_EXTEND &&
15576 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15577 (OpBits-(PromBits-1)))) {
15578 ReallyNeedsExt = true;
15579 break;
15580 }
15581 }
15582 }
15583
15584 // Convert PromOps to handles before doing any RAUW operations, as these
15585 // may CSE with existing nodes, deleting the originals.
15586 std::list<HandleSDNode> PromOpHandles;
15587 for (auto &PromOp : PromOps)
15588 PromOpHandles.emplace_back(PromOp);
15589
15590 // Replace all inputs, either with the truncation operand, or a
15591 // truncation or extension to the final output type.
15592 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15593 // Constant inputs need to be replaced with the to-be-promoted nodes that
15594 // use them because they might have users outside of the cluster of
15595 // promoted nodes.
15596 if (isa<ConstantSDNode>(Inputs[i]))
15597 continue;
15598
15599 SDValue InSrc = Inputs[i].getOperand(0);
15600 if (Inputs[i].getValueType() == N->getValueType(0))
15601 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15602 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15603 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15604 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15605 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15606 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15607 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15608 else
15609 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15610 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15611 }
15612
15613 // Replace all operations (these are all the same, but have a different
15614 // (promoted) return type). DAG.getNode will validate that the types of
15615 // a binary operator match, so go through the list in reverse so that
15616 // we've likely promoted both operands first.
15617 while (!PromOpHandles.empty()) {
15618 SDValue PromOp = PromOpHandles.back().getValue();
15619 PromOpHandles.pop_back();
15620
15621 unsigned C;
15622 switch (PromOp.getOpcode()) {
15623 default: C = 0; break;
15624 case ISD::SELECT: C = 1; break;
15625 case ISD::SELECT_CC: C = 2; break;
15626 }
15627
15628 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15629 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15630 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15631 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15632 // The to-be-promoted operands of this node have not yet been
15633 // promoted (this should be rare because we're going through the
15634 // list backward, but if one of the operands has several users in
15635 // this cluster of to-be-promoted nodes, it is possible).
15636 PromOpHandles.emplace_front(PromOp);
15637 continue;
15638 }
15639
15640 // For SELECT and SELECT_CC nodes, we do a similar check for any
15641 // to-be-promoted comparison inputs.
15642 if (PromOp.getOpcode() == ISD::SELECT ||
15643 PromOp.getOpcode() == ISD::SELECT_CC) {
15644 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15645 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15646 (SelectTruncOp[1].count(PromOp.getNode()) &&
15647 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15648 PromOpHandles.emplace_front(PromOp);
15649 continue;
15650 }
15651 }
15652
15654
15655 // If this node has constant inputs, then they'll need to be promoted here.
15656 for (unsigned i = 0; i < 2; ++i) {
15657 if (!isa<ConstantSDNode>(Ops[C+i]))
15658 continue;
15659 if (Ops[C+i].getValueType() == N->getValueType(0))
15660 continue;
15661
15662 if (N->getOpcode() == ISD::SIGN_EXTEND)
15663 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15664 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15665 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15666 else
15667 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15668 }
15669
15670 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15671 // truncate them again to the original value type.
15672 if (PromOp.getOpcode() == ISD::SELECT ||
15673 PromOp.getOpcode() == ISD::SELECT_CC) {
15674 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15675 if (SI0 != SelectTruncOp[0].end())
15676 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15677 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15678 if (SI1 != SelectTruncOp[1].end())
15679 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15680 }
15681
15682 DAG.ReplaceAllUsesOfValueWith(PromOp,
15683 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15684 }
15685
15686 // Now we're left with the initial extension itself.
15687 if (!ReallyNeedsExt)
15688 return N->getOperand(0);
15689
15690 // To zero extend, just mask off everything except for the first bit (in the
15691 // i1 case).
15692 if (N->getOpcode() == ISD::ZERO_EXTEND)
15693 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15695 N->getValueSizeInBits(0), PromBits),
15696 dl, N->getValueType(0)));
15697
15698 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15699 "Invalid extension type");
15700 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15701 SDValue ShiftCst =
15702 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15703 return DAG.getNode(
15704 ISD::SRA, dl, N->getValueType(0),
15705 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15706 ShiftCst);
15707}
15708
15709// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15711
15712 auto isValidForConvert = [](SDValue &Operand) {
15713 if (!Operand.hasOneUse())
15714 return false;
15715
15716 if (Operand.getValueType() != MVT::i128)
15717 return false;
15718
15719 if (Operand.getOpcode() == ISD::Constant)
15720 return true;
15721
15722 auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
15723 if (!LoadNode)
15724 return false;
15725
15726 // If memory operation is volatile, do not perform any
15727 // optimization or transformation. Volatile operations must be preserved
15728 // as written to ensure correct program behavior, so we return an empty
15729 // SDValue to indicate no action.
15730
15731 if (LoadNode->isVolatile())
15732 return false;
15733
15734 // Only combine loads if both use the unindexed addressing mode.
15735 // PowerPC AltiVec/VMX does not support vector loads or stores with
15736 // pre/post-increment addressing. Indexed modes may imply implicit
15737 // pointer updates, which are not compatible with AltiVec vector
15738 // instructions.
15739 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15740 return false;
15741
15742 // Only combine loads if both are non-extending loads
15743 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15744 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15745 // loaded value's semantics and are not compatible with vector loads.
15746 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15747 return false;
15748
15749 return true;
15750 };
15751
15752 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15753}
15754
15756 const SDLoc &DL) {
15757
15758 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15759
15760 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15761 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15762 "CC mus be ISD::SETNE or ISD::SETEQ");
15763
15764 auto getV16i8Load = [&](const SDValue &Operand) {
15765 if (Operand.getOpcode() == ISD::Constant)
15766 return DAG.getBitcast(MVT::v16i8, Operand);
15767
15768 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15769
15770 auto *LoadNode = cast<LoadSDNode>(Operand);
15771 SDValue NewLoad =
15772 DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
15773 LoadNode->getBasePtr(), LoadNode->getMemOperand());
15774 DAG.ReplaceAllUsesOfValueWith(Operand.getValue(1), NewLoad.getValue(1));
15775 return NewLoad;
15776 };
15777
15778 // Following code transforms the DAG
15779 // t0: ch,glue = EntryToken
15780 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15781 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15782 // undef:i64
15783 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15784 // t5: i128,ch =
15785 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15786 // setcc t3, t5, setne:ch
15787 //
15788 // ---->
15789 //
15790 // t0: ch,glue = EntryToken
15791 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15792 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15793 // undef:i64
15794 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15795 // t5: v16i8,ch =
15796 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15797 // t6: i32 =
15798 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15799 // Constant:i32<2>, t3, t5
15800 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15801
15802 // Or transforms the DAG
15803 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15804 // t8: i1 =
15805 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15806 //
15807 // --->
15808 //
15809 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15810 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15811 // t7: i32 =
15812 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15813
15814 SDValue LHSVec = getV16i8Load(N->getOperand(0));
15815 SDValue RHSVec = getV16i8Load(N->getOperand(1));
15816
15817 SDValue IntrID =
15818 DAG.getConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL, MVT::i32);
15819 SDValue CRSel = DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15820 SDValue PredResult = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
15821 IntrID, CRSel, LHSVec, RHSVec);
15822 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15823 // so we need to invert the CC opcode.
15824 return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15825 DAG.getConstant(0, DL, MVT::i32),
15826 CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15827}
15828
15829// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
15830// If it is , return true; otherwise return false.
15832 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15833
15834 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15835 if (CC != ISD::SETEQ)
15836 return false;
15837
15838 SDValue LHS = N->getOperand(0);
15839 SDValue RHS = N->getOperand(1);
15840
15841 // Check the `SDValue &V` is from `and` with `1`.
15842 auto IsAndWithOne = [](SDValue &V) {
15843 if (V.getOpcode() == ISD::AND) {
15844 for (const SDValue &Op : V->ops())
15845 if (auto *C = dyn_cast<ConstantSDNode>(Op))
15846 if (C->isOne())
15847 return true;
15848 }
15849 return false;
15850 };
15851
15852 // Check whether the SETCC compare with zero.
15853 auto IsCompareWithZero = [](SDValue &V) {
15854 if (auto *C = dyn_cast<ConstantSDNode>(V))
15855 if (C->isZero())
15856 return true;
15857 return false;
15858 };
15859
15860 return (IsAndWithOne(LHS) && IsCompareWithZero(RHS)) ||
15861 (IsAndWithOne(RHS) && IsCompareWithZero(LHS));
15862}
15863
15864// You must check whether the `SDNode* N` can be converted to Xori using
15865// the function `static bool canConvertSETCCToXori(SDNode *N)`
15866// before calling the function; otherwise, it may produce incorrect results.
15868
15869 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15870 SDValue LHS = N->getOperand(0);
15871 SDValue RHS = N->getOperand(1);
15872 SDLoc DL(N);
15873
15874 [[maybe_unused]] ISD::CondCode CC =
15875 cast<CondCodeSDNode>(N->getOperand(2))->get();
15876 assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
15877 // Rewrite it as XORI (and X, 1), 1.
15878 auto MakeXor1 = [&](SDValue V) {
15879 EVT VT = V.getValueType();
15880 SDValue One = DAG.getConstant(1, DL, VT);
15881 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, V, One);
15882 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Xor);
15883 };
15884
15885 if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
15886 return MakeXor1(LHS);
15887
15888 if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
15889 return MakeXor1(RHS);
15890
15891 llvm_unreachable("Should not reach here.");
15892}
15893
15894SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15895 DAGCombinerInfo &DCI) const {
15896 assert(N->getOpcode() == ISD::SETCC &&
15897 "Should be called with a SETCC node");
15898
15899 // Check if the pattern (setcc (and X, 1), 0, eq) is present.
15900 // If it is, rewrite it as XORI (and X, 1), 1.
15902 return ConvertSETCCToXori(N, DCI.DAG);
15903
15904 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15905 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15906 SDValue LHS = N->getOperand(0);
15907 SDValue RHS = N->getOperand(1);
15908
15909 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15910 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
15911 LHS.hasOneUse())
15912 std::swap(LHS, RHS);
15913
15914 // x == 0-y --> x+y == 0
15915 // x != 0-y --> x+y != 0
15916 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
15917 RHS.hasOneUse()) {
15918 SDLoc DL(N);
15919 SelectionDAG &DAG = DCI.DAG;
15920 EVT VT = N->getValueType(0);
15921 EVT OpVT = LHS.getValueType();
15922 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
15923 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
15924 }
15925
15926 // Optimization: Fold i128 equality/inequality compares of two loads into a
15927 // vectorized compare using vcmpequb.p when Altivec is available.
15928 //
15929 // Rationale:
15930 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
15931 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
15932 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
15933 // perform a full 128-bit equality check in a single vector compare.
15934 //
15935 // Example Result:
15936 // This transformation replaces memcmp(a, b, 16) with two vector loads
15937 // and one vector compare instruction.
15938
15939 if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
15940 return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
15941 }
15942
15943 return DAGCombineTruncBoolExt(N, DCI);
15944}
15945
15946// Is this an extending load from an f32 to an f64?
15947static bool isFPExtLoad(SDValue Op) {
15948 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
15949 return LD->getExtensionType() == ISD::EXTLOAD &&
15950 Op.getValueType() == MVT::f64;
15951 return false;
15952}
15953
15954/// Reduces the number of fp-to-int conversion when building a vector.
15955///
15956/// If this vector is built out of floating to integer conversions,
15957/// transform it to a vector built out of floating point values followed by a
15958/// single floating to integer conversion of the vector.
15959/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15960/// becomes (fptosi (build_vector ($A, $B, ...)))
15961SDValue PPCTargetLowering::
15962combineElementTruncationToVectorTruncation(SDNode *N,
15963 DAGCombinerInfo &DCI) const {
15964 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15965 "Should be called with a BUILD_VECTOR node");
15966
15967 SelectionDAG &DAG = DCI.DAG;
15968 SDLoc dl(N);
15969
15970 SDValue FirstInput = N->getOperand(0);
15971 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15972 "The input operand must be an fp-to-int conversion.");
15973
15974 // This combine happens after legalization so the fp_to_[su]i nodes are
15975 // already converted to PPCSISD nodes.
15976 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
15977 if (FirstConversion == PPCISD::FCTIDZ ||
15978 FirstConversion == PPCISD::FCTIDUZ ||
15979 FirstConversion == PPCISD::FCTIWZ ||
15980 FirstConversion == PPCISD::FCTIWUZ) {
15981 bool IsSplat = true;
15982 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15983 FirstConversion == PPCISD::FCTIWUZ;
15984 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15986 EVT TargetVT = N->getValueType(0);
15987 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15988 SDValue NextOp = N->getOperand(i);
15989 if (NextOp.getOpcode() != PPCISD::MFVSR)
15990 return SDValue();
15991 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15992 if (NextConversion != FirstConversion)
15993 return SDValue();
15994 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15995 // This is not valid if the input was originally double precision. It is
15996 // also not profitable to do unless this is an extending load in which
15997 // case doing this combine will allow us to combine consecutive loads.
15998 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15999 return SDValue();
16000 if (N->getOperand(i) != FirstInput)
16001 IsSplat = false;
16002 }
16003
16004 // If this is a splat, we leave it as-is since there will be only a single
16005 // fp-to-int conversion followed by a splat of the integer. This is better
16006 // for 32-bit and smaller ints and neutral for 64-bit ints.
16007 if (IsSplat)
16008 return SDValue();
16009
16010 // Now that we know we have the right type of node, get its operands
16011 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
16012 SDValue In = N->getOperand(i).getOperand(0);
16013 if (Is32Bit) {
16014 // For 32-bit values, we need to add an FP_ROUND node (if we made it
16015 // here, we know that all inputs are extending loads so this is safe).
16016 if (In.isUndef())
16017 Ops.push_back(DAG.getUNDEF(SrcVT));
16018 else {
16019 SDValue Trunc =
16020 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
16021 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
16022 Ops.push_back(Trunc);
16023 }
16024 } else
16025 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
16026 }
16027
16028 unsigned Opcode;
16029 if (FirstConversion == PPCISD::FCTIDZ ||
16030 FirstConversion == PPCISD::FCTIWZ)
16031 Opcode = ISD::FP_TO_SINT;
16032 else
16033 Opcode = ISD::FP_TO_UINT;
16034
16035 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
16036 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
16037 return DAG.getNode(Opcode, dl, TargetVT, BV);
16038 }
16039 return SDValue();
16040}
16041
16042// LXVKQ instruction load VSX vector with a special quadword value
16043// based on an immediate value. This helper method returns the details of the
16044// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
16045// to help generate the LXVKQ instruction and the subsequent shift instruction
16046// required to match the original build vector pattern.
16047
16048// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
16049using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
16050
16051static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
16052
16053 // LXVKQ instruction loads the Quadword value:
16054 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
16055 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
16056 static const uint32_t Uim = 16;
16057
16058 // Check for direct LXVKQ match (no shift needed)
16059 if (FullVal == BasePattern)
16060 return std::make_tuple(Uim, uint8_t{0});
16061
16062 // Check if FullValue is 1 (the result of the base pattern >> 127)
16063 if (FullVal == APInt(128, 1))
16064 return std::make_tuple(Uim, uint8_t{127});
16065
16066 return std::nullopt;
16067}
16068
16069/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
16070/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
16071/// LXVKQ instruction load VSX vector with a special quadword value based on an
16072/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
16073/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
16074/// This can be used to inline the build vector constants that have the
16075/// following patterns:
16076///
16077/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
16078/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
16079/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
16080/// combination of splatting and right shift instructions.
16081
16082SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
16083 SelectionDAG &DAG) const {
16084
16085 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
16086 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
16087
16088 // This transformation is only supported if we are loading either a byte,
16089 // halfword, word, or doubleword.
16090 EVT VT = Op.getValueType();
16091 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
16092 VT == MVT::v2i64))
16093 return SDValue();
16094
16095 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
16096 << VT.getEVTString() << "): ";
16097 Op->dump());
16098
16099 unsigned NumElems = VT.getVectorNumElements();
16100 unsigned ElemBits = VT.getScalarSizeInBits();
16101
16102 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
16103
16104 // Check for Non-constant operand in the build vector.
16105 for (const SDValue &Operand : Op.getNode()->op_values()) {
16106 if (!isa<ConstantSDNode>(Operand))
16107 return SDValue();
16108 }
16109
16110 // Assemble build vector operands as a 128-bit register value
16111 // We need to reconstruct what the 128-bit register pattern would be
16112 // that produces this vector when interpreted with the current endianness
16113 APInt FullVal = APInt::getZero(128);
16114
16115 for (unsigned Index = 0; Index < NumElems; ++Index) {
16116 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
16117
16118 // Get element value as raw bits (zero-extended)
16119 uint64_t ElemValue = C->getZExtValue();
16120
16121 // Mask to element size to ensure we only get the relevant bits
16122 if (ElemBits < 64)
16123 ElemValue &= ((1ULL << ElemBits) - 1);
16124
16125 // Calculate bit position for this element in the 128-bit register
16126 unsigned BitPos =
16127 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
16128
16129 // Create APInt for the element value and shift it to correct position
16130 APInt ElemAPInt(128, ElemValue);
16131 ElemAPInt <<= BitPos;
16132
16133 // Place the element value at the correct bit position
16134 FullVal |= ElemAPInt;
16135 }
16136
16137 if (FullVal.isZero() || FullVal.isAllOnes())
16138 return SDValue();
16139
16140 if (auto UIMOpt = getPatternInfo(FullVal)) {
16141 const auto &[Uim, ShiftAmount] = *UIMOpt;
16142 SDLoc Dl(Op);
16143
16144 // Generate LXVKQ instruction if the shift amount is zero.
16145 if (ShiftAmount == 0) {
16146 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
16147 SDValue LxvkqInstr =
16148 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
16150 << "combineBVLoadsSpecialValue: Instruction Emitted ";
16151 LxvkqInstr.dump());
16152 return LxvkqInstr;
16153 }
16154
16155 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
16156
16157 // The right shifted pattern can be constructed using a combination of
16158 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16159 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16160 // value 255.
16161 SDValue ShiftAmountVec =
16162 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
16163 DAG.getTargetConstant(255, Dl, MVT::i32)),
16164 0);
16165 // Generate appropriate right shift instruction
16166 SDValue ShiftVec = SDValue(
16167 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
16168 0);
16170 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16171 ShiftVec.dump());
16172 return ShiftVec;
16173 }
16174 // No patterns matched for build vectors.
16175 return SDValue();
16176}
16177
16178/// Reduce the number of loads when building a vector.
16179///
16180/// Building a vector out of multiple loads can be converted to a load
16181/// of the vector type if the loads are consecutive. If the loads are
16182/// consecutive but in descending order, a shuffle is added at the end
16183/// to reorder the vector.
16185 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16186 "Should be called with a BUILD_VECTOR node");
16187
16188 SDLoc dl(N);
16189
16190 // Return early for non byte-sized type, as they can't be consecutive.
16191 if (!N->getValueType(0).getVectorElementType().isByteSized())
16192 return SDValue();
16193
16194 bool InputsAreConsecutiveLoads = true;
16195 bool InputsAreReverseConsecutive = true;
16196 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
16197 SDValue FirstInput = N->getOperand(0);
16198 bool IsRoundOfExtLoad = false;
16199 LoadSDNode *FirstLoad = nullptr;
16200
16201 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16202 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
16203 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
16204 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16205 }
16206 // Not a build vector of (possibly fp_rounded) loads.
16207 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
16208 N->getNumOperands() == 1)
16209 return SDValue();
16210
16211 if (!IsRoundOfExtLoad)
16212 FirstLoad = cast<LoadSDNode>(FirstInput);
16213
16215 InputLoads.push_back(FirstLoad);
16216 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
16217 // If any inputs are fp_round(extload), they all must be.
16218 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
16219 return SDValue();
16220
16221 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
16222 N->getOperand(i);
16223 if (NextInput.getOpcode() != ISD::LOAD)
16224 return SDValue();
16225
16226 SDValue PreviousInput =
16227 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
16228 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
16229 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
16230
16231 // If any inputs are fp_round(extload), they all must be.
16232 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16233 return SDValue();
16234
16235 // We only care about regular loads. The PPC-specific load intrinsics
16236 // will not lead to a merge opportunity.
16237 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
16238 InputsAreConsecutiveLoads = false;
16239 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
16240 InputsAreReverseConsecutive = false;
16241
16242 // Exit early if the loads are neither consecutive nor reverse consecutive.
16243 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16244 return SDValue();
16245 InputLoads.push_back(LD2);
16246 }
16247
16248 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16249 "The loads cannot be both consecutive and reverse consecutive.");
16250
16251 SDValue WideLoad;
16252 SDValue ReturnSDVal;
16253 if (InputsAreConsecutiveLoads) {
16254 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16255 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
16256 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16257 FirstLoad->getAlign());
16258 ReturnSDVal = WideLoad;
16259 } else if (InputsAreReverseConsecutive) {
16260 LoadSDNode *LastLoad = InputLoads.back();
16261 assert(LastLoad && "Input needs to be a LoadSDNode.");
16262 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
16263 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
16264 LastLoad->getAlign());
16266 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16267 Ops.push_back(i);
16268
16269 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
16270 DAG.getUNDEF(N->getValueType(0)), Ops);
16271 } else
16272 return SDValue();
16273
16274 for (auto *LD : InputLoads)
16275 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
16276 return ReturnSDVal;
16277}
16278
16279// This function adds the required vector_shuffle needed to get
16280// the elements of the vector extract in the correct position
16281// as specified by the CorrectElems encoding.
16283 SDValue Input, uint64_t Elems,
16284 uint64_t CorrectElems) {
16285 SDLoc dl(N);
16286
16287 unsigned NumElems = Input.getValueType().getVectorNumElements();
16288 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16289
16290 // Knowing the element indices being extracted from the original
16291 // vector and the order in which they're being inserted, just put
16292 // them at element indices required for the instruction.
16293 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16294 if (DAG.getDataLayout().isLittleEndian())
16295 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16296 else
16297 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16298 CorrectElems = CorrectElems >> 8;
16299 Elems = Elems >> 8;
16300 }
16301
16302 SDValue Shuffle =
16303 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
16304 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
16305
16306 EVT VT = N->getValueType(0);
16307 SDValue Conv = DAG.getBitcast(VT, Shuffle);
16308
16309 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
16310 Input.getValueType().getVectorElementType(),
16312 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
16313 DAG.getValueType(ExtVT));
16314}
16315
16316// Look for build vector patterns where input operands come from sign
16317// extended vector_extract elements of specific indices. If the correct indices
16318// aren't used, add a vector shuffle to fix up the indices and create
16319// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16320// during instruction selection.
16322 // This array encodes the indices that the vector sign extend instructions
16323 // extract from when extending from one type to another for both BE and LE.
16324 // The right nibble of each byte corresponds to the LE incides.
16325 // and the left nibble of each byte corresponds to the BE incides.
16326 // For example: 0x3074B8FC byte->word
16327 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16328 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16329 // For example: 0x000070F8 byte->double word
16330 // For LE: the allowed indices are: 0x0,0x8
16331 // For BE: the allowed indices are: 0x7,0xF
16332 uint64_t TargetElems[] = {
16333 0x3074B8FC, // b->w
16334 0x000070F8, // b->d
16335 0x10325476, // h->w
16336 0x00003074, // h->d
16337 0x00001032, // w->d
16338 };
16339
16340 uint64_t Elems = 0;
16341 int Index;
16342 SDValue Input;
16343
16344 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16345 if (!Op)
16346 return false;
16347 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16348 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16349 return false;
16350
16351 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16352 // of the right width.
16353 SDValue Extract = Op.getOperand(0);
16354 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16355 Extract = Extract.getOperand(0);
16356 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16357 return false;
16358
16360 if (!ExtOp)
16361 return false;
16362
16363 Index = ExtOp->getZExtValue();
16364 if (Input && Input != Extract.getOperand(0))
16365 return false;
16366
16367 if (!Input)
16368 Input = Extract.getOperand(0);
16369
16370 Elems = Elems << 8;
16371 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16372 Elems |= Index;
16373
16374 return true;
16375 };
16376
16377 // If the build vector operands aren't sign extended vector extracts,
16378 // of the same input vector, then return.
16379 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16380 if (!isSExtOfVecExtract(N->getOperand(i))) {
16381 return SDValue();
16382 }
16383 }
16384
16385 // If the vector extract indices are not correct, add the appropriate
16386 // vector_shuffle.
16387 int TgtElemArrayIdx;
16388 int InputSize = Input.getValueType().getScalarSizeInBits();
16389 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16390 if (InputSize + OutputSize == 40)
16391 TgtElemArrayIdx = 0;
16392 else if (InputSize + OutputSize == 72)
16393 TgtElemArrayIdx = 1;
16394 else if (InputSize + OutputSize == 48)
16395 TgtElemArrayIdx = 2;
16396 else if (InputSize + OutputSize == 80)
16397 TgtElemArrayIdx = 3;
16398 else if (InputSize + OutputSize == 96)
16399 TgtElemArrayIdx = 4;
16400 else
16401 return SDValue();
16402
16403 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16404 CorrectElems = DAG.getDataLayout().isLittleEndian()
16405 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16406 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16407 if (Elems != CorrectElems) {
16408 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16409 }
16410
16411 // Regular lowering will catch cases where a shuffle is not needed.
16412 return SDValue();
16413}
16414
16415// Look for the pattern of a load from a narrow width to i128, feeding
16416// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16417// (LXVRZX). This node represents a zero extending load that will be matched
16418// to the Load VSX Vector Rightmost instructions.
16420 SDLoc DL(N);
16421
16422 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16423 if (N->getValueType(0) != MVT::v1i128)
16424 return SDValue();
16425
16426 SDValue Operand = N->getOperand(0);
16427 // Proceed with the transformation if the operand to the BUILD_VECTOR
16428 // is a load instruction.
16429 if (Operand.getOpcode() != ISD::LOAD)
16430 return SDValue();
16431
16432 auto *LD = cast<LoadSDNode>(Operand);
16433 EVT MemoryType = LD->getMemoryVT();
16434
16435 // This transformation is only valid if the we are loading either a byte,
16436 // halfword, word, or doubleword.
16437 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16438 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16439
16440 // Ensure that the load from the narrow width is being zero extended to i128.
16441 if (!ValidLDType ||
16442 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16443 LD->getExtensionType() != ISD::EXTLOAD))
16444 return SDValue();
16445
16446 SDValue LoadOps[] = {
16447 LD->getChain(), LD->getBasePtr(),
16448 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16449
16450 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
16451 DAG.getVTList(MVT::v1i128, MVT::Other),
16452 LoadOps, MemoryType, LD->getMemOperand());
16453}
16454
16455SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16456 DAGCombinerInfo &DCI) const {
16457 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16458 "Should be called with a BUILD_VECTOR node");
16459
16460 SelectionDAG &DAG = DCI.DAG;
16461 SDLoc dl(N);
16462
16463 if (!Subtarget.hasVSX())
16464 return SDValue();
16465
16466 // The target independent DAG combiner will leave a build_vector of
16467 // float-to-int conversions intact. We can generate MUCH better code for
16468 // a float-to-int conversion of a vector of floats.
16469 SDValue FirstInput = N->getOperand(0);
16470 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16471 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16472 if (Reduced)
16473 return Reduced;
16474 }
16475
16476 // If we're building a vector out of consecutive loads, just load that
16477 // vector type.
16478 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16479 if (Reduced)
16480 return Reduced;
16481
16482 // If we're building a vector out of extended elements from another vector
16483 // we have P9 vector integer extend instructions. The code assumes legal
16484 // input types (i.e. it can't handle things like v4i16) so do not run before
16485 // legalization.
16486 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16487 Reduced = combineBVOfVecSExt(N, DAG);
16488 if (Reduced)
16489 return Reduced;
16490 }
16491
16492 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16493 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16494 // is a load from <valid narrow width> to i128.
16495 if (Subtarget.isISA3_1()) {
16496 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16497 if (BVOfZLoad)
16498 return BVOfZLoad;
16499 }
16500
16501 if (N->getValueType(0) != MVT::v2f64)
16502 return SDValue();
16503
16504 // Looking for:
16505 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16506 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16507 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16508 return SDValue();
16509 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16510 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16511 return SDValue();
16512 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16513 return SDValue();
16514
16515 SDValue Ext1 = FirstInput.getOperand(0);
16516 SDValue Ext2 = N->getOperand(1).getOperand(0);
16517 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16519 return SDValue();
16520
16521 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16522 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16523 if (!Ext1Op || !Ext2Op)
16524 return SDValue();
16525 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16526 Ext1.getOperand(0) != Ext2.getOperand(0))
16527 return SDValue();
16528
16529 int FirstElem = Ext1Op->getZExtValue();
16530 int SecondElem = Ext2Op->getZExtValue();
16531 int SubvecIdx;
16532 if (FirstElem == 0 && SecondElem == 1)
16533 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16534 else if (FirstElem == 2 && SecondElem == 3)
16535 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16536 else
16537 return SDValue();
16538
16539 SDValue SrcVec = Ext1.getOperand(0);
16540 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16541 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16542 return DAG.getNode(NodeType, dl, MVT::v2f64,
16543 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16544}
16545
16546SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16547 DAGCombinerInfo &DCI) const {
16548 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16549 N->getOpcode() == ISD::UINT_TO_FP) &&
16550 "Need an int -> FP conversion node here");
16551
16552 if (useSoftFloat() || !Subtarget.has64BitSupport())
16553 return SDValue();
16554
16555 SelectionDAG &DAG = DCI.DAG;
16556 SDLoc dl(N);
16557 SDValue Op(N, 0);
16558
16559 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16560 // from the hardware.
16561 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16562 return SDValue();
16563 if (!Op.getOperand(0).getValueType().isSimple())
16564 return SDValue();
16565 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16566 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16567 return SDValue();
16568
16569 SDValue FirstOperand(Op.getOperand(0));
16570 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16571 (FirstOperand.getValueType() == MVT::i8 ||
16572 FirstOperand.getValueType() == MVT::i16);
16573 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16574 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16575 bool DstDouble = Op.getValueType() == MVT::f64;
16576 unsigned ConvOp = Signed ?
16577 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16578 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16579 SDValue WidthConst =
16580 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16581 dl, false);
16582 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16583 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16584 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
16585 DAG.getVTList(MVT::f64, MVT::Other),
16586 Ops, MVT::i8, LDN->getMemOperand());
16587 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16588
16589 // For signed conversion, we need to sign-extend the value in the VSR
16590 if (Signed) {
16591 SDValue ExtOps[] = { Ld, WidthConst };
16592 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16593 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16594 } else
16595 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16596 }
16597
16598
16599 // For i32 intermediate values, unfortunately, the conversion functions
16600 // leave the upper 32 bits of the value are undefined. Within the set of
16601 // scalar instructions, we have no method for zero- or sign-extending the
16602 // value. Thus, we cannot handle i32 intermediate values here.
16603 if (Op.getOperand(0).getValueType() == MVT::i32)
16604 return SDValue();
16605
16606 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16607 "UINT_TO_FP is supported only with FPCVT");
16608
16609 // If we have FCFIDS, then use it when converting to single-precision.
16610 // Otherwise, convert to double-precision and then round.
16611 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16612 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16613 : PPCISD::FCFIDS)
16614 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16615 : PPCISD::FCFID);
16616 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16617 ? MVT::f32
16618 : MVT::f64;
16619
16620 // If we're converting from a float, to an int, and back to a float again,
16621 // then we don't need the store/load pair at all.
16622 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16623 Subtarget.hasFPCVT()) ||
16624 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16625 SDValue Src = Op.getOperand(0).getOperand(0);
16626 if (Src.getValueType() == MVT::f32) {
16627 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16628 DCI.AddToWorklist(Src.getNode());
16629 } else if (Src.getValueType() != MVT::f64) {
16630 // Make sure that we don't pick up a ppc_fp128 source value.
16631 return SDValue();
16632 }
16633
16634 unsigned FCTOp =
16635 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16636 PPCISD::FCTIDUZ;
16637
16638 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16639 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16640
16641 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16642 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16643 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16644 DCI.AddToWorklist(FP.getNode());
16645 }
16646
16647 return FP;
16648 }
16649
16650 return SDValue();
16651}
16652
16653// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16654// builtins) into loads with swaps.
16656 DAGCombinerInfo &DCI) const {
16657 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16658 // load combines.
16659 if (DCI.isBeforeLegalizeOps())
16660 return SDValue();
16661
16662 SelectionDAG &DAG = DCI.DAG;
16663 SDLoc dl(N);
16664 SDValue Chain;
16665 SDValue Base;
16666 MachineMemOperand *MMO;
16667
16668 switch (N->getOpcode()) {
16669 default:
16670 llvm_unreachable("Unexpected opcode for little endian VSX load");
16671 case ISD::LOAD: {
16673 Chain = LD->getChain();
16674 Base = LD->getBasePtr();
16675 MMO = LD->getMemOperand();
16676 // If the MMO suggests this isn't a load of a full vector, leave
16677 // things alone. For a built-in, we have to make the change for
16678 // correctness, so if there is a size problem that will be a bug.
16679 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16680 return SDValue();
16681 break;
16682 }
16685 Chain = Intrin->getChain();
16686 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16687 // us what we want. Get operand 2 instead.
16688 Base = Intrin->getOperand(2);
16689 MMO = Intrin->getMemOperand();
16690 break;
16691 }
16692 }
16693
16694 MVT VecTy = N->getValueType(0).getSimpleVT();
16695
16696 SDValue LoadOps[] = { Chain, Base };
16697 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
16698 DAG.getVTList(MVT::v2f64, MVT::Other),
16699 LoadOps, MVT::v2f64, MMO);
16700
16701 DCI.AddToWorklist(Load.getNode());
16702 Chain = Load.getValue(1);
16703 SDValue Swap = DAG.getNode(
16704 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16705 DCI.AddToWorklist(Swap.getNode());
16706
16707 // Add a bitcast if the resulting load type doesn't match v2f64.
16708 if (VecTy != MVT::v2f64) {
16709 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16710 DCI.AddToWorklist(N.getNode());
16711 // Package {bitcast value, swap's chain} to match Load's shape.
16712 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16713 N, Swap.getValue(1));
16714 }
16715
16716 return Swap;
16717}
16718
16719// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16720// builtins) into stores with swaps.
16722 DAGCombinerInfo &DCI) const {
16723 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16724 // store combines.
16725 if (DCI.isBeforeLegalizeOps())
16726 return SDValue();
16727
16728 SelectionDAG &DAG = DCI.DAG;
16729 SDLoc dl(N);
16730 SDValue Chain;
16731 SDValue Base;
16732 unsigned SrcOpnd;
16733 MachineMemOperand *MMO;
16734
16735 switch (N->getOpcode()) {
16736 default:
16737 llvm_unreachable("Unexpected opcode for little endian VSX store");
16738 case ISD::STORE: {
16740 Chain = ST->getChain();
16741 Base = ST->getBasePtr();
16742 MMO = ST->getMemOperand();
16743 SrcOpnd = 1;
16744 // If the MMO suggests this isn't a store of a full vector, leave
16745 // things alone. For a built-in, we have to make the change for
16746 // correctness, so if there is a size problem that will be a bug.
16747 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16748 return SDValue();
16749 break;
16750 }
16751 case ISD::INTRINSIC_VOID: {
16753 Chain = Intrin->getChain();
16754 // Intrin->getBasePtr() oddly does not get what we want.
16755 Base = Intrin->getOperand(3);
16756 MMO = Intrin->getMemOperand();
16757 SrcOpnd = 2;
16758 break;
16759 }
16760 }
16761
16762 SDValue Src = N->getOperand(SrcOpnd);
16763 MVT VecTy = Src.getValueType().getSimpleVT();
16764
16765 // All stores are done as v2f64 and possible bit cast.
16766 if (VecTy != MVT::v2f64) {
16767 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16768 DCI.AddToWorklist(Src.getNode());
16769 }
16770
16771 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16772 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16773 DCI.AddToWorklist(Swap.getNode());
16774 Chain = Swap.getValue(1);
16775 SDValue StoreOps[] = { Chain, Swap, Base };
16776 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
16777 DAG.getVTList(MVT::Other),
16778 StoreOps, VecTy, MMO);
16779 DCI.AddToWorklist(Store.getNode());
16780 return Store;
16781}
16782
16783// Handle DAG combine for STORE (FP_TO_INT F).
16784SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16785 DAGCombinerInfo &DCI) const {
16786 SelectionDAG &DAG = DCI.DAG;
16787 SDLoc dl(N);
16788 unsigned Opcode = N->getOperand(1).getOpcode();
16789 (void)Opcode;
16790 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16791
16792 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16793 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16794 && "Not a FP_TO_INT Instruction!");
16795
16796 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16797 EVT Op1VT = N->getOperand(1).getValueType();
16798 EVT ResVT = Val.getValueType();
16799
16800 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16801 return SDValue();
16802
16803 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16804 bool ValidTypeForStoreFltAsInt =
16805 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16806 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16807
16808 // TODO: Lower conversion from f128 on all VSX targets
16809 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16810 return SDValue();
16811
16812 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16813 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16814 return SDValue();
16815
16816 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16817
16818 // Set number of bytes being converted.
16819 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16820 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16821 DAG.getIntPtrConstant(ByteSize, dl, false),
16822 DAG.getValueType(Op1VT)};
16823
16824 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
16825 DAG.getVTList(MVT::Other), Ops,
16826 cast<StoreSDNode>(N)->getMemoryVT(),
16827 cast<StoreSDNode>(N)->getMemOperand());
16828
16829 return Val;
16830}
16831
16832static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16833 // Check that the source of the element keeps flipping
16834 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16835 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16836 for (int i = 1, e = Mask.size(); i < e; i++) {
16837 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16838 return false;
16839 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16840 return false;
16841 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16842 }
16843 return true;
16844}
16845
16846static bool isSplatBV(SDValue Op) {
16847 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16848 return false;
16849 SDValue FirstOp;
16850
16851 // Find first non-undef input.
16852 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16853 FirstOp = Op.getOperand(i);
16854 if (!FirstOp.isUndef())
16855 break;
16856 }
16857
16858 // All inputs are undef or the same as the first non-undef input.
16859 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16860 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16861 return false;
16862 return true;
16863}
16864
16866 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16867 return Op;
16868 if (Op.getOpcode() != ISD::BITCAST)
16869 return SDValue();
16870 Op = Op.getOperand(0);
16871 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16872 return Op;
16873 return SDValue();
16874}
16875
16876// Fix up the shuffle mask to account for the fact that the result of
16877// scalar_to_vector is not in lane zero. This just takes all values in
16878// the ranges specified by the min/max indices and adds the number of
16879// elements required to ensure each element comes from the respective
16880// position in the valid lane.
16881// On little endian, that's just the corresponding element in the other
16882// half of the vector. On big endian, it is in the same half but right
16883// justified rather than left justified in that half.
16885 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16886 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16887 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16888 int LHSEltFixup =
16889 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16890 int RHSEltFixup =
16891 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16892 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16893 int Idx = ShuffV[I];
16894 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16895 ShuffV[I] += LHSEltFixup;
16896 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16897 ShuffV[I] += RHSEltFixup;
16898 }
16899}
16900
16901// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16902// the original is:
16903// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16904// In such a case, just change the shuffle mask to extract the element
16905// from the permuted index.
16907 const PPCSubtarget &Subtarget) {
16908 SDLoc dl(OrigSToV);
16909 EVT VT = OrigSToV.getValueType();
16910 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16911 "Expecting a SCALAR_TO_VECTOR here");
16912 SDValue Input = OrigSToV.getOperand(0);
16913
16914 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16915 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
16916 SDValue OrigVector = Input.getOperand(0);
16917
16918 // Can't handle non-const element indices or different vector types
16919 // for the input to the extract and the output of the scalar_to_vector.
16920 if (Idx && VT == OrigVector.getValueType()) {
16921 unsigned NumElts = VT.getVectorNumElements();
16922 assert(
16923 NumElts > 1 &&
16924 "Cannot produce a permuted scalar_to_vector for one element vector");
16925 SmallVector<int, 16> NewMask(NumElts, -1);
16926 unsigned ResultInElt = NumElts / 2;
16927 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16928 NewMask[ResultInElt] = Idx->getZExtValue();
16929 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
16930 }
16931 }
16932 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
16933 OrigSToV.getOperand(0));
16934}
16935
16937 int HalfVec, int LHSLastElementDefined,
16938 int RHSLastElementDefined) {
16939 for (int Index : ShuffV) {
16940 if (Index < 0) // Skip explicitly undefined mask indices.
16941 continue;
16942 // Handle first input vector of the vector_shuffle.
16943 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16944 (Index > LHSLastElementDefined))
16945 return false;
16946 // Handle second input vector of the vector_shuffle.
16947 if ((RHSLastElementDefined >= 0) &&
16948 (Index > HalfVec + RHSLastElementDefined))
16949 return false;
16950 }
16951 return true;
16952}
16953
16955 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16956 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16957 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16958 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16959 // Set up the values for the shuffle vector fixup.
16960 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16961 // The last element depends on if the input comes from the LHS or RHS.
16962 //
16963 // For example:
16964 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16965 //
16966 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16967 // because elements 1 and higher of a scalar_to_vector are undefined.
16968 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16969 // because elements 1 and higher of a scalar_to_vector are undefined.
16970 // It is also not 4 because the original scalar_to_vector is wider and
16971 // actually contains two i32 elements.
16972 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16973 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16974 : FirstElt;
16975 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
16976 if (SToVPermuted.getValueType() != VecShuffOperandType)
16977 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
16978 return SToVPermuted;
16979}
16980
16981// On little endian subtargets, combine shuffles such as:
16982// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16983// into:
16984// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16985// because the latter can be matched to a single instruction merge.
16986// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16987// to put the value into element zero. Adjust the shuffle mask so that the
16988// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16989// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16990// nodes with elements smaller than doubleword because all the ways
16991// of getting scalar data into a vector register put the value in the
16992// rightmost element of the left half of the vector.
16993SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16994 SelectionDAG &DAG) const {
16995 SDValue LHS = SVN->getOperand(0);
16996 SDValue RHS = SVN->getOperand(1);
16997 auto Mask = SVN->getMask();
16998 int NumElts = LHS.getValueType().getVectorNumElements();
16999 SDValue Res(SVN, 0);
17000 SDLoc dl(SVN);
17001 bool IsLittleEndian = Subtarget.isLittleEndian();
17002
17003 // On big endian targets this is only useful for subtargets with direct moves.
17004 // On little endian targets it would be useful for all subtargets with VSX.
17005 // However adding special handling for LE subtargets without direct moves
17006 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
17007 // which includes direct moves.
17008 if (!Subtarget.hasDirectMove())
17009 return Res;
17010
17011 // If this is not a shuffle of a shuffle and the first element comes from
17012 // the second vector, canonicalize to the commuted form. This will make it
17013 // more likely to match one of the single instruction patterns.
17014 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
17015 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
17016 std::swap(LHS, RHS);
17017 Res = DAG.getCommutedVectorShuffle(*SVN);
17018
17019 if (!isa<ShuffleVectorSDNode>(Res))
17020 return Res;
17021
17022 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17023 }
17024
17025 // Adjust the shuffle mask if either input vector comes from a
17026 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
17027 // form (to prevent the need for a swap).
17028 SmallVector<int, 16> ShuffV(Mask);
17029 SDValue SToVLHS = isScalarToVec(LHS);
17030 SDValue SToVRHS = isScalarToVec(RHS);
17031 if (SToVLHS || SToVRHS) {
17032 EVT VT = SVN->getValueType(0);
17033 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
17034 int ShuffleNumElts = ShuffV.size();
17035 int HalfVec = ShuffleNumElts / 2;
17036 // The width of the "valid lane" (i.e. the lane that contains the value that
17037 // is vectorized) needs to be expressed in terms of the number of elements
17038 // of the shuffle. It is thereby the ratio of the values before and after
17039 // any bitcast, which will be set later on if the LHS or RHS are
17040 // SCALAR_TO_VECTOR nodes.
17041 unsigned LHSNumValidElts = HalfVec;
17042 unsigned RHSNumValidElts = HalfVec;
17043
17044 // Initially assume that neither input is permuted. These will be adjusted
17045 // accordingly if either input is. Note, that -1 means that all elements
17046 // are undefined.
17047 int LHSFirstElt = 0;
17048 int RHSFirstElt = ShuffleNumElts;
17049 int LHSLastElt = -1;
17050 int RHSLastElt = -1;
17051
17052 // Get the permuted scalar to vector nodes for the source(s) that come from
17053 // ISD::SCALAR_TO_VECTOR.
17054 // On big endian systems, this only makes sense for element sizes smaller
17055 // than 64 bits since for 64-bit elements, all instructions already put
17056 // the value into element zero. Since scalar size of LHS and RHS may differ
17057 // after isScalarToVec, this should be checked using their own sizes.
17058 int LHSScalarSize = 0;
17059 int RHSScalarSize = 0;
17060 if (SToVLHS) {
17061 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
17062 if (!IsLittleEndian && LHSScalarSize >= 64)
17063 return Res;
17064 }
17065 if (SToVRHS) {
17066 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
17067 if (!IsLittleEndian && RHSScalarSize >= 64)
17068 return Res;
17069 }
17070 if (LHSScalarSize != 0)
17072 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
17073 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
17074 if (RHSScalarSize != 0)
17076 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
17077 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
17078
17079 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
17080 return Res;
17081
17082 // Fix up the shuffle mask to reflect where the desired element actually is.
17083 // The minimum and maximum indices that correspond to element zero for both
17084 // the LHS and RHS are computed and will control which shuffle mask entries
17085 // are to be changed. For example, if the RHS is permuted, any shuffle mask
17086 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
17088 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
17089 LHSNumValidElts, RHSNumValidElts, Subtarget);
17090 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17091
17092 // We may have simplified away the shuffle. We won't be able to do anything
17093 // further with it here.
17094 if (!isa<ShuffleVectorSDNode>(Res))
17095 return Res;
17096 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17097 }
17098
17099 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
17100 // The common case after we commuted the shuffle is that the RHS is a splat
17101 // and we have elements coming in from the splat at indices that are not
17102 // conducive to using a merge.
17103 // Example:
17104 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
17105 if (!isSplatBV(TheSplat))
17106 return Res;
17107
17108 // We are looking for a mask such that all even elements are from
17109 // one vector and all odd elements from the other.
17110 if (!isAlternatingShuffMask(Mask, NumElts))
17111 return Res;
17112
17113 // Adjust the mask so we are pulling in the same index from the splat
17114 // as the index from the interesting vector in consecutive elements.
17115 if (IsLittleEndian) {
17116 // Example (even elements from first vector):
17117 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
17118 if (Mask[0] < NumElts)
17119 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17120 if (ShuffV[i] < 0)
17121 continue;
17122 // If element from non-splat is undef, pick first element from splat.
17123 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
17124 }
17125 // Example (odd elements from first vector):
17126 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
17127 else
17128 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17129 if (ShuffV[i] < 0)
17130 continue;
17131 // If element from non-splat is undef, pick first element from splat.
17132 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
17133 }
17134 } else {
17135 // Example (even elements from first vector):
17136 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
17137 if (Mask[0] < NumElts)
17138 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17139 if (ShuffV[i] < 0)
17140 continue;
17141 // If element from non-splat is undef, pick first element from splat.
17142 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
17143 }
17144 // Example (odd elements from first vector):
17145 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
17146 else
17147 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17148 if (ShuffV[i] < 0)
17149 continue;
17150 // If element from non-splat is undef, pick first element from splat.
17151 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
17152 }
17153 }
17154
17155 // If the RHS has undefs, we need to remove them since we may have created
17156 // a shuffle that adds those instead of the splat value.
17157 SDValue SplatVal =
17158 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
17159 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
17160
17161 if (IsLittleEndian)
17162 RHS = TheSplat;
17163 else
17164 LHS = TheSplat;
17165 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17166}
17167
17168SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17169 LSBaseSDNode *LSBase,
17170 DAGCombinerInfo &DCI) const {
17171 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
17172 "Not a reverse memop pattern!");
17173
17174 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
17175 auto Mask = SVN->getMask();
17176 int i = 0;
17177 auto I = Mask.rbegin();
17178 auto E = Mask.rend();
17179
17180 for (; I != E; ++I) {
17181 if (*I != i)
17182 return false;
17183 i++;
17184 }
17185 return true;
17186 };
17187
17188 SelectionDAG &DAG = DCI.DAG;
17189 EVT VT = SVN->getValueType(0);
17190
17191 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
17192 return SDValue();
17193
17194 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17195 // See comment in PPCVSXSwapRemoval.cpp.
17196 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17197 if (!Subtarget.hasP9Vector())
17198 return SDValue();
17199
17200 if(!IsElementReverse(SVN))
17201 return SDValue();
17202
17203 if (LSBase->getOpcode() == ISD::LOAD) {
17204 // If the load return value 0 has more than one user except the
17205 // shufflevector instruction, it is not profitable to replace the
17206 // shufflevector with a reverse load.
17207 for (SDUse &Use : LSBase->uses())
17208 if (Use.getResNo() == 0 &&
17209 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17210 return SDValue();
17211
17212 SDLoc dl(LSBase);
17213 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17214 return DAG.getMemIntrinsicNode(
17215 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
17216 LSBase->getMemoryVT(), LSBase->getMemOperand());
17217 }
17218
17219 if (LSBase->getOpcode() == ISD::STORE) {
17220 // If there are other uses of the shuffle, the swap cannot be avoided.
17221 // Forcing the use of an X-Form (since swapped stores only have
17222 // X-Forms) without removing the swap is unprofitable.
17223 if (!SVN->hasOneUse())
17224 return SDValue();
17225
17226 SDLoc dl(LSBase);
17227 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
17228 LSBase->getBasePtr()};
17229 return DAG.getMemIntrinsicNode(
17230 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
17231 LSBase->getMemoryVT(), LSBase->getMemOperand());
17232 }
17233
17234 llvm_unreachable("Expected a load or store node here");
17235}
17236
17237static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17238 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
17239 if (IntrinsicID == Intrinsic::ppc_stdcx)
17240 StoreWidth = 8;
17241 else if (IntrinsicID == Intrinsic::ppc_stwcx)
17242 StoreWidth = 4;
17243 else if (IntrinsicID == Intrinsic::ppc_sthcx)
17244 StoreWidth = 2;
17245 else if (IntrinsicID == Intrinsic::ppc_stbcx)
17246 StoreWidth = 1;
17247 else
17248 return false;
17249 return true;
17250}
17251
17254 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
17255 // (ADDC (ADDE 0, 0, C), -1) -> C
17256 SDValue LHS = N->getOperand(0);
17257 SDValue RHS = N->getOperand(1);
17258 if (LHS->getOpcode() == PPCISD::ADDE &&
17259 isNullConstant(LHS->getOperand(0)) &&
17260 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
17261 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
17262 }
17263 }
17264 return SDValue();
17265}
17266
17267// Optimize zero-extension of setcc when the compared value is known to be 0
17268// or 1.
17269//
17270// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
17271// -> zext(xor(Value, 1)) for seteq
17272// -> zext(Value) for setne
17273//
17274// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
17275// by keeping the value in its original i32 type throughout.
17276//
17277// Example:
17278// Before: zext(setcc(test_data_class(...), 0, seteq))
17279// // test_data_class returns 0 or 1 in i32
17280// // setcc converts i32 -> i1
17281// // zext converts i1 -> i64
17282// After: zext(xor(test_data_class(...), 1))
17283// // Stays in i32, then extends to i64
17284//
17285// This is beneficial because:
17286// 1. Eliminates the setcc instruction
17287// 2. Avoids i32 -> i1 truncation
17288// 3. Keeps computation in native integer width
17289
17291 // Check if this is a zero_extend
17292 if (N->getOpcode() != ISD::ZERO_EXTEND)
17293 return SDValue();
17294
17295 SDValue Src = N->getOperand(0);
17296
17297 // Check if the source is a setcc
17298 if (Src.getOpcode() != ISD::SETCC)
17299 return SDValue();
17300
17301 SDValue LHS = Src.getOperand(0);
17302 SDValue RHS = Src.getOperand(1);
17303 ISD::CondCode CC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
17304
17306 return SDValue();
17307
17308 SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS;
17309
17310 auto isZeroOrOne = [=](SDValue &V) {
17311 if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17312 V.getConstantOperandVal(0) == Intrinsic::ppc_test_data_class)
17313 return true;
17314 return false;
17315 };
17316
17317 if (!isZeroOrOne(NonNullConstant))
17318 return SDValue();
17319
17320 // Check for pattern: zext(setcc (Value), 0, seteq)) or
17321 // zext(setcc (Value), 0, setne))
17322 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
17323 // Replace with: zext(xor(Value, 1)) for seteq
17324 // or: zext(Value) for setne
17325 // This keeps the value in i32 instead of converting to i1
17326 SDLoc DL(N);
17327 EVT VType = N->getValueType(0);
17328 SDValue NewNonNullConstant = DAG.getZExtOrTrunc(NonNullConstant, DL, VType);
17329
17330 if (CC == ISD::SETNE)
17331 return NewNonNullConstant;
17332
17333 SDValue One = DAG.getConstant(1, DL, VType);
17334 return DAG.getNode(ISD::XOR, DL, VType, NewNonNullConstant, One);
17335 }
17336
17337 return SDValue();
17338}
17339
17340// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
17341// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
17342// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
17343// 1, cc))
17344// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
17345// 0, 1, cc))
17346// 4. etc
17348 assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
17349
17350 EVT XorVT = N->getValueType(0);
17351 if ((XorVT != MVT::i32 && XorVT != MVT::i64))
17352 return SDValue();
17353
17354 SDValue LHS = N->getOperand(0);
17355 SDValue RHS = N->getOperand(1);
17356
17357 // Check for XOR with constant 1
17359 if (!XorConst || !XorConst->isOne()) {
17360 XorConst = dyn_cast<ConstantSDNode>(LHS);
17361 if (!XorConst || !XorConst->isOne())
17362 return SDValue();
17363 // Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
17364 std::swap(LHS, RHS);
17365 }
17366
17367 // Check if LHS has only one use
17368 if (!LHS.hasOneUse())
17369 return SDValue();
17370
17371 // Handle extensions: ZEXT, ANYEXT
17372 SDValue SelectNode = LHS;
17373
17374 if (LHS.getOpcode() == ISD::ZERO_EXTEND ||
17375 LHS.getOpcode() == ISD::ANY_EXTEND) {
17376 SelectNode = LHS.getOperand(0);
17377
17378 // Check if the extension input has only one use
17379 if (!SelectNode.hasOneUse())
17380 return SDValue();
17381 }
17382
17383 // Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
17384 if (!SelectNode.isMachineOpcode())
17385 return SDValue();
17386
17387 unsigned MachineOpc = SelectNode.getMachineOpcode();
17388
17389 // Handle both SELECT_CC_I4 and SELECT_CC_I8
17390 if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
17391 return SDValue();
17392
17393 // SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
17394 if (SelectNode.getNumOperands() != 4)
17395 return SDValue();
17396
17397 ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(1));
17398 ConstantSDNode *ConstOp2 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(2));
17399
17400 if (!ConstOp1 || !ConstOp2)
17401 return SDValue();
17402
17403 // Only optimize if operands are {0, 1} or {1, 0}
17404 if (!((ConstOp1->isOne() && ConstOp2->isZero()) ||
17405 (ConstOp1->isZero() && ConstOp2->isOne())))
17406 return SDValue();
17407
17408 // Pattern matched! Create new SELECT_CC with swapped 0/1 operands to
17409 // eliminate XOR. If original was SELECT_CC(cond, 1, 0, pred), create
17410 // SELECT_CC(cond, 0, 1, pred). If original was SELECT_CC(cond, 0, 1, pred),
17411 // create SELECT_CC(cond, 1, 0, pred).
17412 SDLoc DL(N);
17413 MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
17414
17415 bool ConstOp1IsOne = ConstOp1->isOne();
17416 return SDValue(
17417 DAG.getMachineNode(MachineOpc, DL, XorVT,
17418 {SelectNode.getOperand(0),
17419 DAG.getConstant(ConstOp1IsOne ? 0 : 1, DL, XorVT),
17420 DAG.getConstant(ConstOp1IsOne ? 1 : 0, DL, XorVT),
17421 SelectNode.getOperand(3)}),
17422 0);
17423}
17424
17426 DAGCombinerInfo &DCI) const {
17427 SelectionDAG &DAG = DCI.DAG;
17428 SDLoc dl(N);
17429 switch (N->getOpcode()) {
17430 default: break;
17431 case ISD::ADD:
17432 return combineADD(N, DCI);
17433 case ISD::AND: {
17434 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17435 // original input as that will prevent us from selecting optimal rotates.
17436 // This only matters if the input to the extend is i32 widened to i64.
17437 SDValue Op1 = N->getOperand(0);
17438 SDValue Op2 = N->getOperand(1);
17439 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17440 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17441 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
17442 Op1.getOperand(0).getValueType() != MVT::i32)
17443 break;
17444 SDValue NarrowOp = Op1.getOperand(0);
17445 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17446 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17447 break;
17448
17449 uint64_t Imm = Op2->getAsZExtVal();
17450 // Make sure that the constant is narrow enough to fit in the narrow type.
17451 if (!isUInt<32>(Imm))
17452 break;
17453 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
17454 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
17455 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
17456 }
17457 case ISD::XOR: {
17458 // Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
17459 if (SDValue V = combineXorSelectCC(N, DAG))
17460 return V;
17461 break;
17462 }
17463 case ISD::SHL:
17464 return combineSHL(N, DCI);
17465 case ISD::SRA:
17466 return combineSRA(N, DCI);
17467 case ISD::SRL:
17468 return combineSRL(N, DCI);
17469 case ISD::MUL:
17470 return combineMUL(N, DCI);
17471 case ISD::FMA:
17472 case PPCISD::FNMSUB:
17473 return combineFMALike(N, DCI);
17474 case PPCISD::SHL:
17475 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
17476 return N->getOperand(0);
17477 break;
17478 case PPCISD::SRL:
17479 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
17480 return N->getOperand(0);
17481 break;
17482 case PPCISD::SRA:
17483 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
17484 if (C->isZero() || // 0 >>s V -> 0.
17485 C->isAllOnes()) // -1 >>s V -> -1.
17486 return N->getOperand(0);
17487 }
17488 break;
17489 case ISD::ZERO_EXTEND:
17490 if (SDValue RetV = combineZextSetccWithZero(N, DCI.DAG))
17491 return RetV;
17492 [[fallthrough]];
17493 case ISD::SIGN_EXTEND:
17494 case ISD::ANY_EXTEND:
17495 return DAGCombineExtBoolTrunc(N, DCI);
17496 case ISD::TRUNCATE:
17497 return combineTRUNCATE(N, DCI);
17498 case ISD::SETCC:
17499 if (SDValue CSCC = combineSetCC(N, DCI))
17500 return CSCC;
17501 [[fallthrough]];
17502 case ISD::SELECT_CC:
17503 return DAGCombineTruncBoolExt(N, DCI);
17504 case ISD::SINT_TO_FP:
17505 case ISD::UINT_TO_FP:
17506 return combineFPToIntToFP(N, DCI);
17508 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17509 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17510 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17511 }
17512 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17513 case ISD::STORE: {
17514
17515 EVT Op1VT = N->getOperand(1).getValueType();
17516 unsigned Opcode = N->getOperand(1).getOpcode();
17517
17518 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17519 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17520 SDValue Val = combineStoreFPToInt(N, DCI);
17521 if (Val)
17522 return Val;
17523 }
17524
17525 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17526 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17527 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17528 if (Val)
17529 return Val;
17530 }
17531
17532 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17533 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17534 N->getOperand(1).getNode()->hasOneUse() &&
17535 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17536 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17537
17538 // STBRX can only handle simple types and it makes no sense to store less
17539 // two bytes in byte-reversed order.
17540 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17541 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17542 break;
17543
17544 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17545 // Do an any-extend to 32-bits if this is a half-word input.
17546 if (BSwapOp.getValueType() == MVT::i16)
17547 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17548
17549 // If the type of BSWAP operand is wider than stored memory width
17550 // it need to be shifted to the right side before STBRX.
17551 if (Op1VT.bitsGT(mVT)) {
17552 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17553 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17554 DAG.getConstant(Shift, dl, MVT::i32));
17555 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17556 if (Op1VT == MVT::i64)
17557 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17558 }
17559
17560 SDValue Ops[] = {
17561 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17562 };
17563 return
17564 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17565 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17566 cast<StoreSDNode>(N)->getMemOperand());
17567 }
17568
17569 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17570 // So it can increase the chance of CSE constant construction.
17571 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17572 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17573 // Need to sign-extended to 64-bits to handle negative values.
17574 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17575 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17576 MemVT.getSizeInBits());
17577 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17578
17579 auto *ST = cast<StoreSDNode>(N);
17580 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17581 ST->getBasePtr(), ST->getOffset(), MemVT,
17582 ST->getMemOperand(), ST->getAddressingMode(),
17583 /*IsTruncating=*/true);
17584 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17585 // new store which will change the constant by removing non-demanded bits.
17586 return ST->isUnindexed()
17587 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17588 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17589 }
17590
17591 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17592 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17593 if (Op1VT.isSimple()) {
17594 MVT StoreVT = Op1VT.getSimpleVT();
17595 if (Subtarget.needsSwapsForVSXMemOps() &&
17596 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17597 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17598 return expandVSXStoreForLE(N, DCI);
17599 }
17600 break;
17601 }
17602 case ISD::LOAD: {
17604 EVT VT = LD->getValueType(0);
17605
17606 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17607 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17608 if (VT.isSimple()) {
17609 MVT LoadVT = VT.getSimpleVT();
17610 if (Subtarget.needsSwapsForVSXMemOps() &&
17611 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17612 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17613 return expandVSXLoadForLE(N, DCI);
17614 }
17615
17616 // We sometimes end up with a 64-bit integer load, from which we extract
17617 // two single-precision floating-point numbers. This happens with
17618 // std::complex<float>, and other similar structures, because of the way we
17619 // canonicalize structure copies. However, if we lack direct moves,
17620 // then the final bitcasts from the extracted integer values to the
17621 // floating-point numbers turn into store/load pairs. Even with direct moves,
17622 // just loading the two floating-point numbers is likely better.
17623 auto ReplaceTwoFloatLoad = [&]() {
17624 if (VT != MVT::i64)
17625 return false;
17626
17627 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17628 LD->isVolatile())
17629 return false;
17630
17631 // We're looking for a sequence like this:
17632 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17633 // t16: i64 = srl t13, Constant:i32<32>
17634 // t17: i32 = truncate t16
17635 // t18: f32 = bitcast t17
17636 // t19: i32 = truncate t13
17637 // t20: f32 = bitcast t19
17638
17639 if (!LD->hasNUsesOfValue(2, 0))
17640 return false;
17641
17642 auto UI = LD->user_begin();
17643 while (UI.getUse().getResNo() != 0) ++UI;
17644 SDNode *Trunc = *UI++;
17645 while (UI.getUse().getResNo() != 0) ++UI;
17646 SDNode *RightShift = *UI;
17647 if (Trunc->getOpcode() != ISD::TRUNCATE)
17648 std::swap(Trunc, RightShift);
17649
17650 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17651 Trunc->getValueType(0) != MVT::i32 ||
17652 !Trunc->hasOneUse())
17653 return false;
17654 if (RightShift->getOpcode() != ISD::SRL ||
17655 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17656 RightShift->getConstantOperandVal(1) != 32 ||
17657 !RightShift->hasOneUse())
17658 return false;
17659
17660 SDNode *Trunc2 = *RightShift->user_begin();
17661 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17662 Trunc2->getValueType(0) != MVT::i32 ||
17663 !Trunc2->hasOneUse())
17664 return false;
17665
17666 SDNode *Bitcast = *Trunc->user_begin();
17667 SDNode *Bitcast2 = *Trunc2->user_begin();
17668
17669 if (Bitcast->getOpcode() != ISD::BITCAST ||
17670 Bitcast->getValueType(0) != MVT::f32)
17671 return false;
17672 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17673 Bitcast2->getValueType(0) != MVT::f32)
17674 return false;
17675
17676 if (Subtarget.isLittleEndian())
17677 std::swap(Bitcast, Bitcast2);
17678
17679 // Bitcast has the second float (in memory-layout order) and Bitcast2
17680 // has the first one.
17681
17682 SDValue BasePtr = LD->getBasePtr();
17683 if (LD->isIndexed()) {
17684 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17685 "Non-pre-inc AM on PPC?");
17686 BasePtr =
17687 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17688 LD->getOffset());
17689 }
17690
17691 auto MMOFlags =
17692 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17693 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17694 LD->getPointerInfo(), LD->getAlign(),
17695 MMOFlags, LD->getAAInfo());
17696 SDValue AddPtr =
17697 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17698 BasePtr, DAG.getIntPtrConstant(4, dl));
17699 SDValue FloatLoad2 = DAG.getLoad(
17700 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17701 LD->getPointerInfo().getWithOffset(4),
17702 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17703
17704 if (LD->isIndexed()) {
17705 // Note that DAGCombine should re-form any pre-increment load(s) from
17706 // what is produced here if that makes sense.
17707 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17708 }
17709
17710 DCI.CombineTo(Bitcast2, FloatLoad);
17711 DCI.CombineTo(Bitcast, FloatLoad2);
17712
17713 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17714 SDValue(FloatLoad2.getNode(), 1));
17715 return true;
17716 };
17717
17718 if (ReplaceTwoFloatLoad())
17719 return SDValue(N, 0);
17720
17721 EVT MemVT = LD->getMemoryVT();
17722 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17723 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17724 if (LD->isUnindexed() && VT.isVector() &&
17725 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17726 // P8 and later hardware should just use LOAD.
17727 !Subtarget.hasP8Vector() &&
17728 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17729 VT == MVT::v4f32))) &&
17730 LD->getAlign() < ABIAlignment) {
17731 // This is a type-legal unaligned Altivec load.
17732 SDValue Chain = LD->getChain();
17733 SDValue Ptr = LD->getBasePtr();
17734 bool isLittleEndian = Subtarget.isLittleEndian();
17735
17736 // This implements the loading of unaligned vectors as described in
17737 // the venerable Apple Velocity Engine overview. Specifically:
17738 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17739 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17740 //
17741 // The general idea is to expand a sequence of one or more unaligned
17742 // loads into an alignment-based permutation-control instruction (lvsl
17743 // or lvsr), a series of regular vector loads (which always truncate
17744 // their input address to an aligned address), and a series of
17745 // permutations. The results of these permutations are the requested
17746 // loaded values. The trick is that the last "extra" load is not taken
17747 // from the address you might suspect (sizeof(vector) bytes after the
17748 // last requested load), but rather sizeof(vector) - 1 bytes after the
17749 // last requested vector. The point of this is to avoid a page fault if
17750 // the base address happened to be aligned. This works because if the
17751 // base address is aligned, then adding less than a full vector length
17752 // will cause the last vector in the sequence to be (re)loaded.
17753 // Otherwise, the next vector will be fetched as you might suspect was
17754 // necessary.
17755
17756 // We might be able to reuse the permutation generation from
17757 // a different base address offset from this one by an aligned amount.
17758 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17759 // optimization later.
17760 Intrinsic::ID Intr, IntrLD, IntrPerm;
17761 MVT PermCntlTy, PermTy, LDTy;
17762 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17763 : Intrinsic::ppc_altivec_lvsl;
17764 IntrLD = Intrinsic::ppc_altivec_lvx;
17765 IntrPerm = Intrinsic::ppc_altivec_vperm;
17766 PermCntlTy = MVT::v16i8;
17767 PermTy = MVT::v4i32;
17768 LDTy = MVT::v4i32;
17769
17770 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
17771
17772 // Create the new MMO for the new base load. It is like the original MMO,
17773 // but represents an area in memory almost twice the vector size centered
17774 // on the original address. If the address is unaligned, we might start
17775 // reading up to (sizeof(vector)-1) bytes below the address of the
17776 // original unaligned load.
17778 MachineMemOperand *BaseMMO =
17779 MF.getMachineMemOperand(LD->getMemOperand(),
17780 -(int64_t)MemVT.getStoreSize()+1,
17781 2*MemVT.getStoreSize()-1);
17782
17783 // Create the new base load.
17784 SDValue LDXIntID =
17785 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
17786 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17787 SDValue BaseLoad =
17789 DAG.getVTList(PermTy, MVT::Other),
17790 BaseLoadOps, LDTy, BaseMMO);
17791
17792 // Note that the value of IncOffset (which is provided to the next
17793 // load's pointer info offset value, and thus used to calculate the
17794 // alignment), and the value of IncValue (which is actually used to
17795 // increment the pointer value) are different! This is because we
17796 // require the next load to appear to be aligned, even though it
17797 // is actually offset from the base pointer by a lesser amount.
17798 int IncOffset = VT.getSizeInBits() / 8;
17799 int IncValue = IncOffset;
17800
17801 // Walk (both up and down) the chain looking for another load at the real
17802 // (aligned) offset (the alignment of the other load does not matter in
17803 // this case). If found, then do not use the offset reduction trick, as
17804 // that will prevent the loads from being later combined (as they would
17805 // otherwise be duplicates).
17806 if (!findConsecutiveLoad(LD, DAG))
17807 --IncValue;
17808
17810 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
17811 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
17812
17813 MachineMemOperand *ExtraMMO =
17814 MF.getMachineMemOperand(LD->getMemOperand(),
17815 1, 2*MemVT.getStoreSize()-1);
17816 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17817 SDValue ExtraLoad =
17819 DAG.getVTList(PermTy, MVT::Other),
17820 ExtraLoadOps, LDTy, ExtraMMO);
17821
17822 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17823 BaseLoad.getValue(1), ExtraLoad.getValue(1));
17824
17825 // Because vperm has a big-endian bias, we must reverse the order
17826 // of the input vectors and complement the permute control vector
17827 // when generating little endian code. We have already handled the
17828 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17829 // and ExtraLoad here.
17830 SDValue Perm;
17831 if (isLittleEndian)
17832 Perm = BuildIntrinsicOp(IntrPerm,
17833 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
17834 else
17835 Perm = BuildIntrinsicOp(IntrPerm,
17836 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
17837
17838 if (VT != PermTy)
17839 Perm = Subtarget.hasAltivec()
17840 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
17841 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
17842 DAG.getTargetConstant(1, dl, MVT::i64));
17843 // second argument is 1 because this rounding
17844 // is always exact.
17845
17846 // The output of the permutation is our loaded result, the TokenFactor is
17847 // our new chain.
17848 DCI.CombineTo(N, Perm, TF);
17849 return SDValue(N, 0);
17850 }
17851 }
17852 break;
17854 bool isLittleEndian = Subtarget.isLittleEndian();
17855 unsigned IID = N->getConstantOperandVal(0);
17856 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17857 : Intrinsic::ppc_altivec_lvsl);
17858 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
17859 SDValue Add = N->getOperand(1);
17860
17861 int Bits = 4 /* 16 byte alignment */;
17862
17863 if (DAG.MaskedValueIsZero(Add->getOperand(1),
17864 APInt::getAllOnes(Bits /* alignment */)
17865 .zext(Add.getScalarValueSizeInBits()))) {
17866 SDNode *BasePtr = Add->getOperand(0).getNode();
17867 for (SDNode *U : BasePtr->users()) {
17868 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17869 U->getConstantOperandVal(0) == IID) {
17870 // We've found another LVSL/LVSR, and this address is an aligned
17871 // multiple of that one. The results will be the same, so use the
17872 // one we've just found instead.
17873
17874 return SDValue(U, 0);
17875 }
17876 }
17877 }
17878
17879 if (isa<ConstantSDNode>(Add->getOperand(1))) {
17880 SDNode *BasePtr = Add->getOperand(0).getNode();
17881 for (SDNode *U : BasePtr->users()) {
17882 if (U->getOpcode() == ISD::ADD &&
17883 isa<ConstantSDNode>(U->getOperand(1)) &&
17884 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
17885 (1ULL << Bits) ==
17886 0) {
17887 SDNode *OtherAdd = U;
17888 for (SDNode *V : OtherAdd->users()) {
17889 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17890 V->getConstantOperandVal(0) == IID) {
17891 return SDValue(V, 0);
17892 }
17893 }
17894 }
17895 }
17896 }
17897 }
17898
17899 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17900 // Expose the vabsduw/h/b opportunity for down stream
17901 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17902 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17903 IID == Intrinsic::ppc_altivec_vmaxsh ||
17904 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17905 SDValue V1 = N->getOperand(1);
17906 SDValue V2 = N->getOperand(2);
17907 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17908 V1.getSimpleValueType() == MVT::v8i16 ||
17909 V1.getSimpleValueType() == MVT::v16i8) &&
17911 // (0-a, a)
17912 if (V1.getOpcode() == ISD::SUB &&
17914 V1.getOperand(1) == V2) {
17915 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
17916 }
17917 // (a, 0-a)
17918 if (V2.getOpcode() == ISD::SUB &&
17920 V2.getOperand(1) == V1) {
17921 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17922 }
17923 // (x-y, y-x)
17924 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17925 V1.getOperand(0) == V2.getOperand(1) &&
17926 V1.getOperand(1) == V2.getOperand(0)) {
17927 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17928 }
17929 }
17930 }
17931 }
17932
17933 break;
17935 switch (N->getConstantOperandVal(1)) {
17936 default:
17937 break;
17938 case Intrinsic::ppc_altivec_vsum4sbs:
17939 case Intrinsic::ppc_altivec_vsum4shs:
17940 case Intrinsic::ppc_altivec_vsum4ubs: {
17941 // These sum-across intrinsics only have a chain due to the side effect
17942 // that they may set the SAT bit. If we know the SAT bit will not be set
17943 // for some inputs, we can replace any uses of their chain with the
17944 // input chain.
17945 if (BuildVectorSDNode *BVN =
17946 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
17947 APInt APSplatBits, APSplatUndef;
17948 unsigned SplatBitSize;
17949 bool HasAnyUndefs;
17950 bool BVNIsConstantSplat = BVN->isConstantSplat(
17951 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
17952 !Subtarget.isLittleEndian());
17953 // If the constant splat vector is 0, the SAT bit will not be set.
17954 if (BVNIsConstantSplat && APSplatBits == 0)
17955 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
17956 }
17957 return SDValue();
17958 }
17959 case Intrinsic::ppc_vsx_lxvw4x:
17960 case Intrinsic::ppc_vsx_lxvd2x:
17961 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17962 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17963 if (Subtarget.needsSwapsForVSXMemOps())
17964 return expandVSXLoadForLE(N, DCI);
17965 break;
17966 }
17967 break;
17969 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17970 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17971 if (Subtarget.needsSwapsForVSXMemOps()) {
17972 switch (N->getConstantOperandVal(1)) {
17973 default:
17974 break;
17975 case Intrinsic::ppc_vsx_stxvw4x:
17976 case Intrinsic::ppc_vsx_stxvd2x:
17977 return expandVSXStoreForLE(N, DCI);
17978 }
17979 }
17980 break;
17981 case ISD::BSWAP: {
17982 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17983 // For subtargets without LDBRX, we can still do better than the default
17984 // expansion even for 64-bit BSWAP (LOAD).
17985 bool Is64BitBswapOn64BitTgt =
17986 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
17987 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
17988 N->getOperand(0).hasOneUse();
17989 if (IsSingleUseNormalLd &&
17990 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
17991 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17992 SDValue Load = N->getOperand(0);
17993 LoadSDNode *LD = cast<LoadSDNode>(Load);
17994 // Create the byte-swapping load.
17995 SDValue Ops[] = {
17996 LD->getChain(), // Chain
17997 LD->getBasePtr(), // Ptr
17998 DAG.getValueType(N->getValueType(0)) // VT
17999 };
18000 SDValue BSLoad =
18001 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
18002 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
18003 MVT::i64 : MVT::i32, MVT::Other),
18004 Ops, LD->getMemoryVT(), LD->getMemOperand());
18005
18006 // If this is an i16 load, insert the truncate.
18007 SDValue ResVal = BSLoad;
18008 if (N->getValueType(0) == MVT::i16)
18009 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
18010
18011 // First, combine the bswap away. This makes the value produced by the
18012 // load dead.
18013 DCI.CombineTo(N, ResVal);
18014
18015 // Next, combine the load away, we give it a bogus result value but a real
18016 // chain result. The result value is dead because the bswap is dead.
18017 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
18018
18019 // Return N so it doesn't get rechecked!
18020 return SDValue(N, 0);
18021 }
18022 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
18023 // before legalization so that the BUILD_PAIR is handled correctly.
18024 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
18025 !IsSingleUseNormalLd)
18026 return SDValue();
18027 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
18028
18029 // Can't split volatile or atomic loads.
18030 if (!LD->isSimple())
18031 return SDValue();
18032 SDValue BasePtr = LD->getBasePtr();
18033 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
18034 LD->getPointerInfo(), LD->getAlign());
18035 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
18036 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
18037 DAG.getIntPtrConstant(4, dl));
18039 LD->getMemOperand(), 4, 4);
18040 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
18041 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
18042 SDValue Res;
18043 if (Subtarget.isLittleEndian())
18044 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
18045 else
18046 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
18047 SDValue TF =
18048 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18049 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
18050 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
18051 return Res;
18052 }
18053 case PPCISD::VCMP:
18054 // If a VCMP_rec node already exists with exactly the same operands as this
18055 // node, use its result instead of this node (VCMP_rec computes both a CR6
18056 // and a normal output).
18057 //
18058 if (!N->getOperand(0).hasOneUse() &&
18059 !N->getOperand(1).hasOneUse() &&
18060 !N->getOperand(2).hasOneUse()) {
18061
18062 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
18063 SDNode *VCMPrecNode = nullptr;
18064
18065 SDNode *LHSN = N->getOperand(0).getNode();
18066 for (SDNode *User : LHSN->users())
18067 if (User->getOpcode() == PPCISD::VCMP_rec &&
18068 User->getOperand(1) == N->getOperand(1) &&
18069 User->getOperand(2) == N->getOperand(2) &&
18070 User->getOperand(0) == N->getOperand(0)) {
18071 VCMPrecNode = User;
18072 break;
18073 }
18074
18075 // If there is no VCMP_rec node, or if the flag value has a single use,
18076 // don't transform this.
18077 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
18078 break;
18079
18080 // Look at the (necessarily single) use of the flag value. If it has a
18081 // chain, this transformation is more complex. Note that multiple things
18082 // could use the value result, which we should ignore.
18083 SDNode *FlagUser = nullptr;
18084 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
18085 FlagUser == nullptr; ++UI) {
18086 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
18087 SDNode *User = UI->getUser();
18088 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
18089 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
18090 FlagUser = User;
18091 break;
18092 }
18093 }
18094 }
18095
18096 // If the user is a MFOCRF instruction, we know this is safe.
18097 // Otherwise we give up for right now.
18098 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
18099 return SDValue(VCMPrecNode, 0);
18100 }
18101 break;
18102 case ISD::BR_CC: {
18103 // If this is a branch on an altivec predicate comparison, lower this so
18104 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
18105 // lowering is done pre-legalize, because the legalizer lowers the predicate
18106 // compare down to code that is difficult to reassemble.
18107 // This code also handles branches that depend on the result of a store
18108 // conditional.
18109 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18110 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
18111
18112 int CompareOpc;
18113 bool isDot;
18114
18115 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
18116 break;
18117
18118 // Since we are doing this pre-legalize, the RHS can be a constant of
18119 // arbitrary bitwidth which may cause issues when trying to get the value
18120 // from the underlying APInt.
18121 auto RHSAPInt = RHS->getAsAPIntVal();
18122 if (!RHSAPInt.isIntN(64))
18123 break;
18124
18125 unsigned Val = RHSAPInt.getZExtValue();
18126 auto isImpossibleCompare = [&]() {
18127 // If this is a comparison against something other than 0/1, then we know
18128 // that the condition is never/always true.
18129 if (Val != 0 && Val != 1) {
18130 if (CC == ISD::SETEQ) // Cond never true, remove branch.
18131 return N->getOperand(0);
18132 // Always !=, turn it into an unconditional branch.
18133 return DAG.getNode(ISD::BR, dl, MVT::Other,
18134 N->getOperand(0), N->getOperand(4));
18135 }
18136 return SDValue();
18137 };
18138 // Combine branches fed by store conditional instructions (st[bhwd]cx).
18139 unsigned StoreWidth = 0;
18140 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
18141 isStoreConditional(LHS, StoreWidth)) {
18142 if (SDValue Impossible = isImpossibleCompare())
18143 return Impossible;
18144 PPC::Predicate CompOpc;
18145 // eq 0 => ne
18146 // ne 0 => eq
18147 // eq 1 => eq
18148 // ne 1 => ne
18149 if (Val == 0)
18150 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
18151 else
18152 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
18153
18154 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
18155 DAG.getConstant(StoreWidth, dl, MVT::i32)};
18156 auto *MemNode = cast<MemSDNode>(LHS);
18157 SDValue ConstSt = DAG.getMemIntrinsicNode(
18158 PPCISD::STORE_COND, dl,
18159 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
18160 MemNode->getMemoryVT(), MemNode->getMemOperand());
18161
18162 SDValue InChain;
18163 // Unchain the branch from the original store conditional.
18164 if (N->getOperand(0) == LHS.getValue(1))
18165 InChain = LHS.getOperand(0);
18166 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
18167 SmallVector<SDValue, 4> InChains;
18168 SDValue InTF = N->getOperand(0);
18169 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
18170 if (InTF.getOperand(i) != LHS.getValue(1))
18171 InChains.push_back(InTF.getOperand(i));
18172 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
18173 }
18174
18175 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
18176 DAG.getConstant(CompOpc, dl, MVT::i32),
18177 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
18178 ConstSt.getValue(2));
18179 }
18180
18181 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18182 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
18183 assert(isDot && "Can't compare against a vector result!");
18184
18185 if (SDValue Impossible = isImpossibleCompare())
18186 return Impossible;
18187
18188 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
18189 // Create the PPCISD altivec 'dot' comparison node.
18190 SDValue Ops[] = {
18191 LHS.getOperand(2), // LHS of compare
18192 LHS.getOperand(3), // RHS of compare
18193 DAG.getConstant(CompareOpc, dl, MVT::i32)
18194 };
18195 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
18196 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
18197
18198 // Unpack the result based on how the target uses it.
18199 PPC::Predicate CompOpc;
18200 switch (LHS.getConstantOperandVal(1)) {
18201 default: // Can't happen, don't crash on invalid number though.
18202 case 0: // Branch on the value of the EQ bit of CR6.
18203 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
18204 break;
18205 case 1: // Branch on the inverted value of the EQ bit of CR6.
18206 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
18207 break;
18208 case 2: // Branch on the value of the LT bit of CR6.
18209 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
18210 break;
18211 case 3: // Branch on the inverted value of the LT bit of CR6.
18212 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
18213 break;
18214 }
18215
18216 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
18217 DAG.getConstant(CompOpc, dl, MVT::i32),
18218 DAG.getRegister(PPC::CR6, MVT::i32),
18219 N->getOperand(4), CompNode.getValue(1));
18220 }
18221 break;
18222 }
18223 case ISD::BUILD_VECTOR:
18224 return DAGCombineBuildVector(N, DCI);
18225 case PPCISD::ADDC:
18226 return DAGCombineAddc(N, DCI);
18227 }
18228
18229 return SDValue();
18230}
18231
18232SDValue
18234 SelectionDAG &DAG,
18235 SmallVectorImpl<SDNode *> &Created) const {
18236 // fold (sdiv X, pow2)
18237 EVT VT = N->getValueType(0);
18238 if (VT == MVT::i64 && !Subtarget.isPPC64())
18239 return SDValue();
18240 if ((VT != MVT::i32 && VT != MVT::i64) ||
18241 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
18242 return SDValue();
18243
18244 SDLoc DL(N);
18245 SDValue N0 = N->getOperand(0);
18246
18247 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
18248 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
18249 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
18250
18251 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
18252 Created.push_back(Op.getNode());
18253
18254 if (IsNegPow2) {
18255 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
18256 Created.push_back(Op.getNode());
18257 }
18258
18259 return Op;
18260}
18261
18262//===----------------------------------------------------------------------===//
18263// Inline Assembly Support
18264//===----------------------------------------------------------------------===//
18265
18267 KnownBits &Known,
18268 const APInt &DemandedElts,
18269 const SelectionDAG &DAG,
18270 unsigned Depth) const {
18271 Known.resetAll();
18272 switch (Op.getOpcode()) {
18273 default: break;
18274 case PPCISD::LBRX: {
18275 // lhbrx is known to have the top bits cleared out.
18276 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
18277 Known.Zero = 0xFFFF0000;
18278 break;
18279 }
18280 case PPCISD::ADDE: {
18281 if (Op.getResNo() == 0) {
18282 // (0|1), _ = ADDE 0, 0, CARRY
18283 SDValue LHS = Op.getOperand(0);
18284 SDValue RHS = Op.getOperand(1);
18285 if (isNullConstant(LHS) && isNullConstant(RHS))
18286 Known.Zero = ~1ULL;
18287 }
18288 break;
18289 }
18291 switch (Op.getConstantOperandVal(0)) {
18292 default: break;
18293 case Intrinsic::ppc_altivec_vcmpbfp_p:
18294 case Intrinsic::ppc_altivec_vcmpeqfp_p:
18295 case Intrinsic::ppc_altivec_vcmpequb_p:
18296 case Intrinsic::ppc_altivec_vcmpequh_p:
18297 case Intrinsic::ppc_altivec_vcmpequw_p:
18298 case Intrinsic::ppc_altivec_vcmpequd_p:
18299 case Intrinsic::ppc_altivec_vcmpequq_p:
18300 case Intrinsic::ppc_altivec_vcmpgefp_p:
18301 case Intrinsic::ppc_altivec_vcmpgtfp_p:
18302 case Intrinsic::ppc_altivec_vcmpgtsb_p:
18303 case Intrinsic::ppc_altivec_vcmpgtsh_p:
18304 case Intrinsic::ppc_altivec_vcmpgtsw_p:
18305 case Intrinsic::ppc_altivec_vcmpgtsd_p:
18306 case Intrinsic::ppc_altivec_vcmpgtsq_p:
18307 case Intrinsic::ppc_altivec_vcmpgtub_p:
18308 case Intrinsic::ppc_altivec_vcmpgtuh_p:
18309 case Intrinsic::ppc_altivec_vcmpgtuw_p:
18310 case Intrinsic::ppc_altivec_vcmpgtud_p:
18311 case Intrinsic::ppc_altivec_vcmpgtuq_p:
18312 Known.Zero = ~1U; // All bits but the low one are known to be zero.
18313 break;
18314 }
18315 break;
18316 }
18318 switch (Op.getConstantOperandVal(1)) {
18319 default:
18320 break;
18321 case Intrinsic::ppc_load2r:
18322 // Top bits are cleared for load2r (which is the same as lhbrx).
18323 Known.Zero = 0xFFFF0000;
18324 break;
18325 }
18326 break;
18327 }
18328 }
18329}
18330
18332 switch (Subtarget.getCPUDirective()) {
18333 default: break;
18334 case PPC::DIR_970:
18335 case PPC::DIR_PWR4:
18336 case PPC::DIR_PWR5:
18337 case PPC::DIR_PWR5X:
18338 case PPC::DIR_PWR6:
18339 case PPC::DIR_PWR6X:
18340 case PPC::DIR_PWR7:
18341 case PPC::DIR_PWR8:
18342 case PPC::DIR_PWR9:
18343 case PPC::DIR_PWR10:
18344 case PPC::DIR_PWR11:
18345 case PPC::DIR_PWR_FUTURE: {
18346 if (!ML)
18347 break;
18348
18350 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18351 // so that we can decrease cache misses and branch-prediction misses.
18352 // Actual alignment of the loop will depend on the hotness check and other
18353 // logic in alignBlocks.
18354 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
18355 return Align(32);
18356 }
18357
18358 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18359
18360 // For small loops (between 5 and 8 instructions), align to a 32-byte
18361 // boundary so that the entire loop fits in one instruction-cache line.
18362 uint64_t LoopSize = 0;
18363 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18364 for (const MachineInstr &J : **I) {
18365 LoopSize += TII->getInstSizeInBytes(J);
18366 if (LoopSize > 32)
18367 break;
18368 }
18369
18370 if (LoopSize > 16 && LoopSize <= 32)
18371 return Align(32);
18372
18373 break;
18374 }
18375 }
18376
18378}
18379
18380/// getConstraintType - Given a constraint, return the type of
18381/// constraint it is for this target.
18384 if (Constraint.size() == 1) {
18385 switch (Constraint[0]) {
18386 default: break;
18387 case 'b':
18388 case 'r':
18389 case 'f':
18390 case 'd':
18391 case 'v':
18392 case 'y':
18393 return C_RegisterClass;
18394 case 'Z':
18395 // FIXME: While Z does indicate a memory constraint, it specifically
18396 // indicates an r+r address (used in conjunction with the 'y' modifier
18397 // in the replacement string). Currently, we're forcing the base
18398 // register to be r0 in the asm printer (which is interpreted as zero)
18399 // and forming the complete address in the second register. This is
18400 // suboptimal.
18401 return C_Memory;
18402 }
18403 } else if (Constraint == "wc") { // individual CR bits.
18404 return C_RegisterClass;
18405 } else if (Constraint == "wa" || Constraint == "wd" ||
18406 Constraint == "wf" || Constraint == "ws" ||
18407 Constraint == "wi" || Constraint == "ww") {
18408 return C_RegisterClass; // VSX registers.
18409 }
18410 return TargetLowering::getConstraintType(Constraint);
18411}
18412
18413/// Examine constraint type and operand type and determine a weight value.
18414/// This object must already have been set up with the operand type
18415/// and the current alternative constraint selected.
18418 AsmOperandInfo &info, const char *constraint) const {
18420 Value *CallOperandVal = info.CallOperandVal;
18421 // If we don't have a value, we can't do a match,
18422 // but allow it at the lowest weight.
18423 if (!CallOperandVal)
18424 return CW_Default;
18425 Type *type = CallOperandVal->getType();
18426
18427 // Look at the constraint type.
18428 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
18429 return CW_Register; // an individual CR bit.
18430 else if ((StringRef(constraint) == "wa" ||
18431 StringRef(constraint) == "wd" ||
18432 StringRef(constraint) == "wf") &&
18433 type->isVectorTy())
18434 return CW_Register;
18435 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
18436 return CW_Register; // just hold 64-bit integers data.
18437 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18438 return CW_Register;
18439 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18440 return CW_Register;
18441
18442 switch (*constraint) {
18443 default:
18445 break;
18446 case 'b':
18447 if (type->isIntegerTy())
18448 weight = CW_Register;
18449 break;
18450 case 'f':
18451 if (type->isFloatTy())
18452 weight = CW_Register;
18453 break;
18454 case 'd':
18455 if (type->isDoubleTy())
18456 weight = CW_Register;
18457 break;
18458 case 'v':
18459 if (type->isVectorTy())
18460 weight = CW_Register;
18461 break;
18462 case 'y':
18463 weight = CW_Register;
18464 break;
18465 case 'Z':
18466 weight = CW_Memory;
18467 break;
18468 }
18469 return weight;
18470}
18471
18472std::pair<unsigned, const TargetRegisterClass *>
18474 StringRef Constraint,
18475 MVT VT) const {
18476 if (Constraint.size() == 1) {
18477 // GCC RS6000 Constraint Letters
18478 switch (Constraint[0]) {
18479 case 'b': // R1-R31
18480 if (VT == MVT::i64 && Subtarget.isPPC64())
18481 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
18482 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
18483 case 'r': // R0-R31
18484 if (VT == MVT::i64 && Subtarget.isPPC64())
18485 return std::make_pair(0U, &PPC::G8RCRegClass);
18486 return std::make_pair(0U, &PPC::GPRCRegClass);
18487 // 'd' and 'f' constraints are both defined to be "the floating point
18488 // registers", where one is for 32-bit and the other for 64-bit. We don't
18489 // really care overly much here so just give them all the same reg classes.
18490 case 'd':
18491 case 'f':
18492 if (Subtarget.hasSPE()) {
18493 if (VT == MVT::f32 || VT == MVT::i32)
18494 return std::make_pair(0U, &PPC::GPRCRegClass);
18495 if (VT == MVT::f64 || VT == MVT::i64)
18496 return std::make_pair(0U, &PPC::SPERCRegClass);
18497 } else {
18498 if (VT == MVT::f32 || VT == MVT::i32)
18499 return std::make_pair(0U, &PPC::F4RCRegClass);
18500 if (VT == MVT::f64 || VT == MVT::i64)
18501 return std::make_pair(0U, &PPC::F8RCRegClass);
18502 }
18503 break;
18504 case 'v':
18505 if (Subtarget.hasAltivec() && VT.isVector())
18506 return std::make_pair(0U, &PPC::VRRCRegClass);
18507 else if (Subtarget.hasVSX())
18508 // Scalars in Altivec registers only make sense with VSX.
18509 return std::make_pair(0U, &PPC::VFRCRegClass);
18510 break;
18511 case 'y': // crrc
18512 return std::make_pair(0U, &PPC::CRRCRegClass);
18513 }
18514 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18515 // An individual CR bit.
18516 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18517 } else if ((Constraint == "wa" || Constraint == "wd" ||
18518 Constraint == "wf" || Constraint == "wi") &&
18519 Subtarget.hasVSX()) {
18520 // A VSX register for either a scalar (FP) or vector. There is no
18521 // support for single precision scalars on subtargets prior to Power8.
18522 if (VT.isVector())
18523 return std::make_pair(0U, &PPC::VSRCRegClass);
18524 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18525 return std::make_pair(0U, &PPC::VSSRCRegClass);
18526 return std::make_pair(0U, &PPC::VSFRCRegClass);
18527 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18528 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18529 return std::make_pair(0U, &PPC::VSSRCRegClass);
18530 else
18531 return std::make_pair(0U, &PPC::VSFRCRegClass);
18532 } else if (Constraint == "lr") {
18533 if (VT == MVT::i64)
18534 return std::make_pair(0U, &PPC::LR8RCRegClass);
18535 else
18536 return std::make_pair(0U, &PPC::LRRCRegClass);
18537 }
18538
18539 // Handle special cases of physical registers that are not properly handled
18540 // by the base class.
18541 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18542 // If we name a VSX register, we can't defer to the base class because it
18543 // will not recognize the correct register (their names will be VSL{0-31}
18544 // and V{0-31} so they won't match). So we match them here.
18545 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18546 int VSNum = atoi(Constraint.data() + 3);
18547 assert(VSNum >= 0 && VSNum <= 63 &&
18548 "Attempted to access a vsr out of range");
18549 if (VSNum < 32)
18550 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18551 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18552 }
18553
18554 // For float registers, we can't defer to the base class as it will match
18555 // the SPILLTOVSRRC class.
18556 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18557 int RegNum = atoi(Constraint.data() + 2);
18558 if (RegNum > 31 || RegNum < 0)
18559 report_fatal_error("Invalid floating point register number");
18560 if (VT == MVT::f32 || VT == MVT::i32)
18561 return Subtarget.hasSPE()
18562 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18563 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18564 if (VT == MVT::f64 || VT == MVT::i64)
18565 return Subtarget.hasSPE()
18566 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18567 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18568 }
18569 }
18570
18571 std::pair<unsigned, const TargetRegisterClass *> R =
18573
18574 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18575 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18576 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18577 // register.
18578 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18579 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18580 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18581 PPC::GPRCRegClass.contains(R.first))
18582 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18583 PPC::sub_32, &PPC::G8RCRegClass),
18584 &PPC::G8RCRegClass);
18585
18586 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18587 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18588 R.first = PPC::CR0;
18589 R.second = &PPC::CRRCRegClass;
18590 }
18591 // FIXME: This warning should ideally be emitted in the front end.
18592 const auto &TM = getTargetMachine();
18593 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18594 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18595 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18596 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18597 errs() << "warning: vector registers 20 to 32 are reserved in the "
18598 "default AIX AltiVec ABI and cannot be used\n";
18599 }
18600
18601 return R;
18602}
18603
18604/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18605/// vector. If it is invalid, don't add anything to Ops.
18607 StringRef Constraint,
18608 std::vector<SDValue> &Ops,
18609 SelectionDAG &DAG) const {
18610 SDValue Result;
18611
18612 // Only support length 1 constraints.
18613 if (Constraint.size() > 1)
18614 return;
18615
18616 char Letter = Constraint[0];
18617 switch (Letter) {
18618 default: break;
18619 case 'I':
18620 case 'J':
18621 case 'K':
18622 case 'L':
18623 case 'M':
18624 case 'N':
18625 case 'O':
18626 case 'P': {
18628 if (!CST) return; // Must be an immediate to match.
18629 SDLoc dl(Op);
18630 int64_t Value = CST->getSExtValue();
18631 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18632 // numbers are printed as such.
18633 switch (Letter) {
18634 default: llvm_unreachable("Unknown constraint letter!");
18635 case 'I': // "I" is a signed 16-bit constant.
18636 if (isInt<16>(Value))
18637 Result = DAG.getTargetConstant(Value, dl, TCVT);
18638 break;
18639 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18641 Result = DAG.getTargetConstant(Value, dl, TCVT);
18642 break;
18643 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18645 Result = DAG.getTargetConstant(Value, dl, TCVT);
18646 break;
18647 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18648 if (isUInt<16>(Value))
18649 Result = DAG.getTargetConstant(Value, dl, TCVT);
18650 break;
18651 case 'M': // "M" is a constant that is greater than 31.
18652 if (Value > 31)
18653 Result = DAG.getTargetConstant(Value, dl, TCVT);
18654 break;
18655 case 'N': // "N" is a positive constant that is an exact power of two.
18656 if (Value > 0 && isPowerOf2_64(Value))
18657 Result = DAG.getTargetConstant(Value, dl, TCVT);
18658 break;
18659 case 'O': // "O" is the constant zero.
18660 if (Value == 0)
18661 Result = DAG.getTargetConstant(Value, dl, TCVT);
18662 break;
18663 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18664 if (isInt<16>(-Value))
18665 Result = DAG.getTargetConstant(Value, dl, TCVT);
18666 break;
18667 }
18668 break;
18669 }
18670 }
18671
18672 if (Result.getNode()) {
18673 Ops.push_back(Result);
18674 return;
18675 }
18676
18677 // Handle standard constraint letters.
18679}
18680
18683 SelectionDAG &DAG) const {
18684 if (I.getNumOperands() <= 1)
18685 return;
18686 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18687 return;
18688 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18689 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18690 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18691 return;
18692
18693 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18694 Ops.push_back(DAG.getMDNode(MDN));
18695}
18696
18697// isLegalAddressingMode - Return true if the addressing mode represented
18698// by AM is legal for this target, for a load/store of the specified type.
18700 const AddrMode &AM, Type *Ty,
18701 unsigned AS,
18702 Instruction *I) const {
18703 // Vector type r+i form is supported since power9 as DQ form. We don't check
18704 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18705 // imm form is preferred and the offset can be adjusted to use imm form later
18706 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18707 // max offset to check legal addressing mode, we should be a little aggressive
18708 // to contain other offsets for that LSRUse.
18709 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18710 return false;
18711
18712 // PPC allows a sign-extended 16-bit immediate field.
18713 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18714 return false;
18715
18716 // No global is ever allowed as a base.
18717 if (AM.BaseGV)
18718 return false;
18719
18720 // PPC only support r+r,
18721 switch (AM.Scale) {
18722 case 0: // "r+i" or just "i", depending on HasBaseReg.
18723 break;
18724 case 1:
18725 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18726 return false;
18727 // Otherwise we have r+r or r+i.
18728 break;
18729 case 2:
18730 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18731 return false;
18732 // Allow 2*r as r+r.
18733 break;
18734 default:
18735 // No other scales are supported.
18736 return false;
18737 }
18738
18739 return true;
18740}
18741
18742SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18743 SelectionDAG &DAG) const {
18745 MachineFrameInfo &MFI = MF.getFrameInfo();
18746 MFI.setReturnAddressIsTaken(true);
18747
18748 SDLoc dl(Op);
18749 unsigned Depth = Op.getConstantOperandVal(0);
18750
18751 // Make sure the function does not optimize away the store of the RA to
18752 // the stack.
18753 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18754 FuncInfo->setLRStoreRequired();
18755 auto PtrVT = getPointerTy(MF.getDataLayout());
18756
18757 if (Depth > 0) {
18758 // The link register (return address) is saved in the caller's frame
18759 // not the callee's stack frame. So we must get the caller's frame
18760 // address and load the return address at the LR offset from there.
18761 SDValue FrameAddr =
18762 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18764 SDValue Offset =
18765 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18766 Subtarget.getScalarIntVT());
18767 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
18768 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
18770 }
18771
18772 // Just load the return address off the stack.
18773 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18774 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
18776}
18777
18778SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18779 SelectionDAG &DAG) const {
18780 SDLoc dl(Op);
18781 unsigned Depth = Op.getConstantOperandVal(0);
18782
18783 MachineFunction &MF = DAG.getMachineFunction();
18784 MachineFrameInfo &MFI = MF.getFrameInfo();
18785 MFI.setFrameAddressIsTaken(true);
18786
18787 EVT PtrVT = getPointerTy(MF.getDataLayout());
18788 bool isPPC64 = PtrVT == MVT::i64;
18789
18790 // Naked functions never have a frame pointer, and so we use r1. For all
18791 // other functions, this decision must be delayed until during PEI.
18792 unsigned FrameReg;
18793 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
18794 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18795 else
18796 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18797
18798 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
18799 PtrVT);
18800 while (Depth--)
18801 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18802 FrameAddr, MachinePointerInfo());
18803 return FrameAddr;
18804}
18805
18806#define GET_REGISTER_MATCHER
18807#include "PPCGenAsmMatcher.inc"
18808
18810 const MachineFunction &MF) const {
18811 bool IsPPC64 = Subtarget.isPPC64();
18812
18813 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
18814 if (!Is64Bit && VT != LLT::scalar(32))
18815 report_fatal_error("Invalid register global variable type");
18816
18818 if (!Reg)
18819 return Reg;
18820
18821 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18822 // Need followup investigation as to why.
18823 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18824 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
18825 StringRef(RegName) + "\"."));
18826
18827 // Convert GPR to GP8R register for 64bit.
18828 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
18829 Reg = Reg.id() - PPC::R0 + PPC::X0;
18830
18831 return Reg;
18832}
18833
18835 // 32-bit SVR4 ABI access everything as got-indirect.
18836 if (Subtarget.is32BitELFABI())
18837 return true;
18838
18839 // AIX accesses everything indirectly through the TOC, which is similar to
18840 // the GOT.
18841 if (Subtarget.isAIXABI())
18842 return true;
18843
18845 // If it is small or large code model, module locals are accessed
18846 // indirectly by loading their address from .toc/.got.
18847 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18848 return true;
18849
18850 // JumpTable and BlockAddress are accessed as got-indirect.
18852 return true;
18853
18855 return Subtarget.isGVIndirectSymbol(G->getGlobal());
18856
18857 return false;
18858}
18859
18860bool
18862 // The PowerPC target isn't yet aware of offsets.
18863 return false;
18864}
18865
18868 MachineFunction &MF, unsigned Intrinsic) const {
18869 IntrinsicInfo Info;
18870 switch (Intrinsic) {
18871 case Intrinsic::ppc_atomicrmw_xchg_i128:
18872 case Intrinsic::ppc_atomicrmw_add_i128:
18873 case Intrinsic::ppc_atomicrmw_sub_i128:
18874 case Intrinsic::ppc_atomicrmw_nand_i128:
18875 case Intrinsic::ppc_atomicrmw_and_i128:
18876 case Intrinsic::ppc_atomicrmw_or_i128:
18877 case Intrinsic::ppc_atomicrmw_xor_i128:
18878 case Intrinsic::ppc_cmpxchg_i128:
18879 Info.opc = ISD::INTRINSIC_W_CHAIN;
18880 Info.memVT = MVT::i128;
18881 Info.ptrVal = I.getArgOperand(0);
18882 Info.offset = 0;
18883 Info.align = Align(16);
18886 Infos.push_back(Info);
18887 return;
18888 case Intrinsic::ppc_atomic_load_i128:
18889 Info.opc = ISD::INTRINSIC_W_CHAIN;
18890 Info.memVT = MVT::i128;
18891 Info.ptrVal = I.getArgOperand(0);
18892 Info.offset = 0;
18893 Info.align = Align(16);
18895 Infos.push_back(Info);
18896 return;
18897 case Intrinsic::ppc_atomic_store_i128:
18898 Info.opc = ISD::INTRINSIC_VOID;
18899 Info.memVT = MVT::i128;
18900 Info.ptrVal = I.getArgOperand(2);
18901 Info.offset = 0;
18902 Info.align = Align(16);
18904 Infos.push_back(Info);
18905 return;
18906 case Intrinsic::ppc_altivec_lvx:
18907 case Intrinsic::ppc_altivec_lvxl:
18908 case Intrinsic::ppc_altivec_lvebx:
18909 case Intrinsic::ppc_altivec_lvehx:
18910 case Intrinsic::ppc_altivec_lvewx:
18911 case Intrinsic::ppc_vsx_lxvd2x:
18912 case Intrinsic::ppc_vsx_lxvw4x:
18913 case Intrinsic::ppc_vsx_lxvd2x_be:
18914 case Intrinsic::ppc_vsx_lxvw4x_be:
18915 case Intrinsic::ppc_vsx_lxvl:
18916 case Intrinsic::ppc_vsx_lxvll: {
18917 EVT VT;
18918 switch (Intrinsic) {
18919 case Intrinsic::ppc_altivec_lvebx:
18920 VT = MVT::i8;
18921 break;
18922 case Intrinsic::ppc_altivec_lvehx:
18923 VT = MVT::i16;
18924 break;
18925 case Intrinsic::ppc_altivec_lvewx:
18926 VT = MVT::i32;
18927 break;
18928 case Intrinsic::ppc_vsx_lxvd2x:
18929 case Intrinsic::ppc_vsx_lxvd2x_be:
18930 VT = MVT::v2f64;
18931 break;
18932 default:
18933 VT = MVT::v4i32;
18934 break;
18935 }
18936
18937 Info.opc = ISD::INTRINSIC_W_CHAIN;
18938 Info.memVT = VT;
18939 Info.ptrVal = I.getArgOperand(0);
18940 Info.offset = -VT.getStoreSize()+1;
18941 Info.size = 2*VT.getStoreSize()-1;
18942 Info.align = Align(1);
18943 Info.flags = MachineMemOperand::MOLoad;
18944 Infos.push_back(Info);
18945 return;
18946 }
18947 case Intrinsic::ppc_altivec_stvx:
18948 case Intrinsic::ppc_altivec_stvxl:
18949 case Intrinsic::ppc_altivec_stvebx:
18950 case Intrinsic::ppc_altivec_stvehx:
18951 case Intrinsic::ppc_altivec_stvewx:
18952 case Intrinsic::ppc_vsx_stxvd2x:
18953 case Intrinsic::ppc_vsx_stxvw4x:
18954 case Intrinsic::ppc_vsx_stxvd2x_be:
18955 case Intrinsic::ppc_vsx_stxvw4x_be:
18956 case Intrinsic::ppc_vsx_stxvl:
18957 case Intrinsic::ppc_vsx_stxvll: {
18958 EVT VT;
18959 switch (Intrinsic) {
18960 case Intrinsic::ppc_altivec_stvebx:
18961 VT = MVT::i8;
18962 break;
18963 case Intrinsic::ppc_altivec_stvehx:
18964 VT = MVT::i16;
18965 break;
18966 case Intrinsic::ppc_altivec_stvewx:
18967 VT = MVT::i32;
18968 break;
18969 case Intrinsic::ppc_vsx_stxvd2x:
18970 case Intrinsic::ppc_vsx_stxvd2x_be:
18971 VT = MVT::v2f64;
18972 break;
18973 default:
18974 VT = MVT::v4i32;
18975 break;
18976 }
18977
18978 Info.opc = ISD::INTRINSIC_VOID;
18979 Info.memVT = VT;
18980 Info.ptrVal = I.getArgOperand(1);
18981 Info.offset = -VT.getStoreSize()+1;
18982 Info.size = 2*VT.getStoreSize()-1;
18983 Info.align = Align(1);
18984 Info.flags = MachineMemOperand::MOStore;
18985 Infos.push_back(Info);
18986 return;
18987 }
18988 case Intrinsic::ppc_stdcx:
18989 case Intrinsic::ppc_stwcx:
18990 case Intrinsic::ppc_sthcx:
18991 case Intrinsic::ppc_stbcx: {
18992 EVT VT;
18993 auto Alignment = Align(8);
18994 switch (Intrinsic) {
18995 case Intrinsic::ppc_stdcx:
18996 VT = MVT::i64;
18997 break;
18998 case Intrinsic::ppc_stwcx:
18999 VT = MVT::i32;
19000 Alignment = Align(4);
19001 break;
19002 case Intrinsic::ppc_sthcx:
19003 VT = MVT::i16;
19004 Alignment = Align(2);
19005 break;
19006 case Intrinsic::ppc_stbcx:
19007 VT = MVT::i8;
19008 Alignment = Align(1);
19009 break;
19010 }
19011 Info.opc = ISD::INTRINSIC_W_CHAIN;
19012 Info.memVT = VT;
19013 Info.ptrVal = I.getArgOperand(0);
19014 Info.offset = 0;
19015 Info.align = Alignment;
19017 Infos.push_back(Info);
19018 return;
19019 }
19020 default:
19021 break;
19022 }
19023}
19024
19025/// It returns EVT::Other if the type should be determined using generic
19026/// target-independent logic.
19028 LLVMContext &Context, const MemOp &Op,
19029 const AttributeList &FuncAttributes) const {
19030 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
19031 // We should use Altivec/VSX loads and stores when available. For unaligned
19032 // addresses, unaligned VSX loads are only fast starting with the P8.
19033 if (Subtarget.hasAltivec() && Op.size() >= 16) {
19034 if (Op.isMemset() && Subtarget.hasVSX()) {
19035 uint64_t TailSize = Op.size() % 16;
19036 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
19037 // element if vector element type matches tail store. For tail size
19038 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
19039 if (TailSize > 2 && TailSize <= 4) {
19040 return MVT::v8i16;
19041 }
19042 return MVT::v4i32;
19043 }
19044 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
19045 return MVT::v4i32;
19046 }
19047 }
19048
19049 if (Subtarget.isPPC64()) {
19050 return MVT::i64;
19051 }
19052
19053 return MVT::i32;
19054}
19055
19056/// Returns true if it is beneficial to convert a load of a constant
19057/// to just the constant itself.
19059 Type *Ty) const {
19060 assert(Ty->isIntegerTy());
19061
19062 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19063 return !(BitSize == 0 || BitSize > 64);
19064}
19065
19067 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19068 return false;
19069 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
19070 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
19071 return NumBits1 == 64 && NumBits2 == 32;
19072}
19073
19075 if (!VT1.isInteger() || !VT2.isInteger())
19076 return false;
19077 unsigned NumBits1 = VT1.getSizeInBits();
19078 unsigned NumBits2 = VT2.getSizeInBits();
19079 return NumBits1 == 64 && NumBits2 == 32;
19080}
19081
19083 // Generally speaking, zexts are not free, but they are free when they can be
19084 // folded with other operations.
19085 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
19086 EVT MemVT = LD->getMemoryVT();
19087 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
19088 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
19089 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
19090 LD->getExtensionType() == ISD::ZEXTLOAD))
19091 return true;
19092 }
19093
19094 // FIXME: Add other cases...
19095 // - 32-bit shifts with a zext to i64
19096 // - zext after ctlz, bswap, etc.
19097 // - zext after and by a constant mask
19098
19099 return TargetLowering::isZExtFree(Val, VT2);
19100}
19101
19102bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
19103 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
19104 "invalid fpext types");
19105 // Extending to float128 is not free.
19106 if (DestVT == MVT::f128)
19107 return false;
19108 return true;
19109}
19110
19112 return isInt<16>(Imm) || isUInt<16>(Imm);
19113}
19114
19116 return isInt<16>(Imm) || isUInt<16>(Imm);
19117}
19118
19121 unsigned *Fast) const {
19123 return false;
19124
19125 // PowerPC supports unaligned memory access for simple non-vector types.
19126 // Although accessing unaligned addresses is not as efficient as accessing
19127 // aligned addresses, it is generally more efficient than manual expansion,
19128 // and generally only traps for software emulation when crossing page
19129 // boundaries.
19130
19131 if (!VT.isSimple())
19132 return false;
19133
19134 if (VT.isFloatingPoint() && !VT.isVector() &&
19135 !Subtarget.allowsUnalignedFPAccess())
19136 return false;
19137
19138 if (VT.getSimpleVT().isVector()) {
19139 if (Subtarget.hasVSX()) {
19140 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
19141 VT != MVT::v4f32 && VT != MVT::v4i32)
19142 return false;
19143 } else {
19144 return false;
19145 }
19146 }
19147
19148 if (VT == MVT::ppcf128)
19149 return false;
19150
19151 if (Fast)
19152 *Fast = 1;
19153
19154 return true;
19155}
19156
19158 SDValue C) const {
19159 // Check integral scalar types.
19160 if (!VT.isScalarInteger())
19161 return false;
19162 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
19163 if (!ConstNode->getAPIntValue().isSignedIntN(64))
19164 return false;
19165 // This transformation will generate >= 2 operations. But the following
19166 // cases will generate <= 2 instructions during ISEL. So exclude them.
19167 // 1. If the constant multiplier fits 16 bits, it can be handled by one
19168 // HW instruction, ie. MULLI
19169 // 2. If the multiplier after shifted fits 16 bits, an extra shift
19170 // instruction is needed than case 1, ie. MULLI and RLDICR
19171 int64_t Imm = ConstNode->getSExtValue();
19172 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
19173 Imm >>= Shift;
19174 if (isInt<16>(Imm))
19175 return false;
19176 uint64_t UImm = static_cast<uint64_t>(Imm);
19177 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
19178 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
19179 return true;
19180 }
19181 return false;
19182}
19183
19189
19191 Type *Ty) const {
19192 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
19193 return false;
19194 switch (Ty->getScalarType()->getTypeID()) {
19195 case Type::FloatTyID:
19196 case Type::DoubleTyID:
19197 return true;
19198 case Type::FP128TyID:
19199 return Subtarget.hasP9Vector();
19200 default:
19201 return false;
19202 }
19203}
19204
19205// FIXME: add more patterns which are not profitable to hoist.
19207 if (!I->hasOneUse())
19208 return true;
19209
19210 Instruction *User = I->user_back();
19211 assert(User && "A single use instruction with no uses.");
19212
19213 switch (I->getOpcode()) {
19214 case Instruction::FMul: {
19215 // Don't break FMA, PowerPC prefers FMA.
19216 if (User->getOpcode() != Instruction::FSub &&
19217 User->getOpcode() != Instruction::FAdd)
19218 return true;
19219
19221 const Function *F = I->getFunction();
19222 const DataLayout &DL = F->getDataLayout();
19223 Type *Ty = User->getOperand(0)->getType();
19224 bool AllowContract = I->getFastMathFlags().allowContract() &&
19225 User->getFastMathFlags().allowContract();
19226
19227 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
19229 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
19230 }
19231 case Instruction::Load: {
19232 // Don't break "store (load float*)" pattern, this pattern will be combined
19233 // to "store (load int32)" in later InstCombine pass. See function
19234 // combineLoadToOperationType. On PowerPC, loading a float point takes more
19235 // cycles than loading a 32 bit integer.
19236 LoadInst *LI = cast<LoadInst>(I);
19237 // For the loads that combineLoadToOperationType does nothing, like
19238 // ordered load, it should be profitable to hoist them.
19239 // For swifterror load, it can only be used for pointer to pointer type, so
19240 // later type check should get rid of this case.
19241 if (!LI->isUnordered())
19242 return true;
19243
19244 if (User->getOpcode() != Instruction::Store)
19245 return true;
19246
19247 if (I->getType()->getTypeID() != Type::FloatTyID)
19248 return true;
19249
19250 return false;
19251 }
19252 default:
19253 return true;
19254 }
19255 return true;
19256}
19257
19258const MCPhysReg *
19260 // LR is a callee-save register, but we must treat it as clobbered by any call
19261 // site. Hence we include LR in the scratch registers, which are in turn added
19262 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
19263 // to CTR, which is used by any indirect call.
19264 static const MCPhysReg ScratchRegs[] = {
19265 PPC::X12, PPC::LR8, PPC::CTR8, 0
19266 };
19267
19268 return ScratchRegs;
19269}
19270
19272 const Constant *PersonalityFn) const {
19273 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
19274}
19275
19277 const Constant *PersonalityFn) const {
19278 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
19279}
19280
19281bool
19283 EVT VT , unsigned DefinedValues) const {
19284 if (VT == MVT::v2i64)
19285 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
19286
19287 if (Subtarget.hasVSX())
19288 return true;
19289
19291}
19292
19294 if (DisableILPPref || Subtarget.enableMachineScheduler())
19296
19297 return Sched::ILP;
19298}
19299
19300// Create a fast isel object.
19302 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
19303 const LibcallLoweringInfo *LibcallLowering) const {
19304 return PPC::createFastISel(FuncInfo, LibInfo, LibcallLowering);
19305}
19306
19307// 'Inverted' means the FMA opcode after negating one multiplicand.
19308// For example, (fma -a b c) = (fnmsub a b c)
19309static unsigned invertFMAOpcode(unsigned Opc) {
19310 switch (Opc) {
19311 default:
19312 llvm_unreachable("Invalid FMA opcode for PowerPC!");
19313 case ISD::FMA:
19314 return PPCISD::FNMSUB;
19315 case PPCISD::FNMSUB:
19316 return ISD::FMA;
19317 }
19318}
19319
19321 bool LegalOps, bool OptForSize,
19323 unsigned Depth) const {
19325 return SDValue();
19326
19327 unsigned Opc = Op.getOpcode();
19328 EVT VT = Op.getValueType();
19329 SDNodeFlags Flags = Op.getNode()->getFlags();
19330
19331 switch (Opc) {
19332 case PPCISD::FNMSUB:
19333 if (!Op.hasOneUse() || !isTypeLegal(VT))
19334 break;
19335
19336 SDValue N0 = Op.getOperand(0);
19337 SDValue N1 = Op.getOperand(1);
19338 SDValue N2 = Op.getOperand(2);
19339 SDLoc Loc(Op);
19340
19342 SDValue NegN2 =
19343 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
19344
19345 if (!NegN2)
19346 return SDValue();
19347
19348 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19349 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19350 // These transformations may change sign of zeroes. For example,
19351 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19352 if (Flags.hasNoSignedZeros()) {
19353 // Try and choose the cheaper one to negate.
19355 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
19356 N0Cost, Depth + 1);
19357
19359 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
19360 N1Cost, Depth + 1);
19361
19362 if (NegN0 && N0Cost <= N1Cost) {
19363 Cost = std::min(N0Cost, N2Cost);
19364 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
19365 } else if (NegN1) {
19366 Cost = std::min(N1Cost, N2Cost);
19367 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
19368 }
19369 }
19370
19371 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
19372 if (isOperationLegal(ISD::FMA, VT)) {
19373 Cost = N2Cost;
19374 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
19375 }
19376
19377 break;
19378 }
19379
19380 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19381 Cost, Depth);
19382}
19383
19384// Override to enable LOAD_STACK_GUARD lowering on Linux.
19386 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
19387 return true;
19389}
19390
19392 bool ForCodeSize) const {
19393 if (!VT.isSimple() || !Subtarget.hasVSX())
19394 return false;
19395
19396 switch(VT.getSimpleVT().SimpleTy) {
19397 default:
19398 // For FP types that are currently not supported by PPC backend, return
19399 // false. Examples: f16, f80.
19400 return false;
19401 case MVT::f32:
19402 case MVT::f64: {
19403 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19404 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19405 return true;
19406 }
19407 bool IsExact;
19408 APSInt IntResult(16, false);
19409 // The rounding mode doesn't really matter because we only care about floats
19410 // that can be converted to integers exactly.
19411 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
19412 // For exact values in the range [-16, 15] we can materialize the float.
19413 if (IsExact && IntResult <= 15 && IntResult >= -16)
19414 return true;
19415 return Imm.isZero();
19416 }
19417 case MVT::ppcf128:
19418 return Imm.isPosZero();
19419 }
19420}
19421
19422// For vector shift operation op, fold
19423// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19425 SelectionDAG &DAG) {
19426 SDValue N0 = N->getOperand(0);
19427 SDValue N1 = N->getOperand(1);
19428 EVT VT = N0.getValueType();
19429 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19430 unsigned Opcode = N->getOpcode();
19431 unsigned TargetOpcode;
19432
19433 switch (Opcode) {
19434 default:
19435 llvm_unreachable("Unexpected shift operation");
19436 case ISD::SHL:
19437 TargetOpcode = PPCISD::SHL;
19438 break;
19439 case ISD::SRL:
19440 TargetOpcode = PPCISD::SRL;
19441 break;
19442 case ISD::SRA:
19443 TargetOpcode = PPCISD::SRA;
19444 break;
19445 }
19446
19447 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
19448 N1->getOpcode() == ISD::AND)
19449 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
19450 if (Mask->getZExtValue() == OpSizeInBits - 1)
19451 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
19452
19453 return SDValue();
19454}
19455
19456SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19457 DAGCombinerInfo &DCI) const {
19458 EVT VT = N->getValueType(0);
19459 assert(VT.isVector() && "Vector type expected.");
19460
19461 unsigned Opc = N->getOpcode();
19462 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19463 "Unexpected opcode.");
19464
19465 if (!isOperationLegal(Opc, VT))
19466 return SDValue();
19467
19468 EVT EltTy = VT.getScalarType();
19469 unsigned EltBits = EltTy.getSizeInBits();
19470 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19471 return SDValue();
19472
19473 SDValue N1 = N->getOperand(1);
19474 uint64_t SplatBits = 0;
19475 bool AddSplatCase = false;
19476 unsigned OpcN1 = N1.getOpcode();
19477 if (OpcN1 == PPCISD::VADD_SPLAT &&
19479 AddSplatCase = true;
19480 SplatBits = N1.getConstantOperandVal(0);
19481 }
19482
19483 if (!AddSplatCase) {
19484 if (OpcN1 != ISD::BUILD_VECTOR)
19485 return SDValue();
19486
19487 unsigned SplatBitSize;
19488 bool HasAnyUndefs;
19489 APInt APSplatBits, APSplatUndef;
19490 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
19491 bool BVNIsConstantSplat =
19492 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
19493 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
19494 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19495 return SDValue();
19496 SplatBits = APSplatBits.getZExtValue();
19497 }
19498
19499 SDLoc DL(N);
19500 SDValue N0 = N->getOperand(0);
19501 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19502 // shift vector, which means the max value is 31/63. A shift vector of all
19503 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19504 // -16 to 15 range.
19505 if (SplatBits == (EltBits - 1)) {
19506 unsigned NewOpc;
19507 switch (Opc) {
19508 case ISD::SHL:
19509 NewOpc = PPCISD::SHL;
19510 break;
19511 case ISD::SRL:
19512 NewOpc = PPCISD::SRL;
19513 break;
19514 case ISD::SRA:
19515 NewOpc = PPCISD::SRA;
19516 break;
19517 }
19518 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19519 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19520 }
19521
19522 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19523 return SDValue();
19524
19525 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19526 // before the BUILD_VECTOR is replaced by a load.
19527 if (EltTy != MVT::i64 || SplatBits != 1)
19528 return SDValue();
19529
19530 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19531}
19532
19533SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19534 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19535 return Value;
19536
19537 if (N->getValueType(0).isVector())
19538 return combineVectorShift(N, DCI);
19539
19540 SDValue N0 = N->getOperand(0);
19541 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19542 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19543 N0.getOpcode() != ISD::SIGN_EXTEND ||
19544 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19545 N->getValueType(0) != MVT::i64)
19546 return SDValue();
19547
19548 // We can't save an operation here if the value is already extended, and
19549 // the existing shift is easier to combine.
19550 SDValue ExtsSrc = N0.getOperand(0);
19551 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19552 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19553 return SDValue();
19554
19555 SDLoc DL(N0);
19556 SDValue ShiftBy = SDValue(CN1, 0);
19557 // We want the shift amount to be i32 on the extswli, but the shift could
19558 // have an i64.
19559 if (ShiftBy.getValueType() == MVT::i64)
19560 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19561
19562 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19563 ShiftBy);
19564}
19565
19566SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19567 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19568 return Value;
19569
19570 if (N->getValueType(0).isVector())
19571 return combineVectorShift(N, DCI);
19572
19573 return SDValue();
19574}
19575
19576SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19577 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19578 return Value;
19579
19580 if (N->getValueType(0).isVector())
19581 return combineVectorShift(N, DCI);
19582
19583 return SDValue();
19584}
19585
19586// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19587// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19588// When C is zero, the equation (addi Z, -C) can be simplified to Z
19589// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19591 const PPCSubtarget &Subtarget) {
19592 if (!Subtarget.isPPC64())
19593 return SDValue();
19594
19595 SDValue LHS = N->getOperand(0);
19596 SDValue RHS = N->getOperand(1);
19597
19598 auto isZextOfCompareWithConstant = [](SDValue Op) {
19599 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19600 Op.getValueType() != MVT::i64)
19601 return false;
19602
19603 SDValue Cmp = Op.getOperand(0);
19604 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19605 Cmp.getOperand(0).getValueType() != MVT::i64)
19606 return false;
19607
19608 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19609 int64_t NegConstant = 0 - Constant->getSExtValue();
19610 // Due to the limitations of the addi instruction,
19611 // -C is required to be [-32768, 32767].
19612 return isInt<16>(NegConstant);
19613 }
19614
19615 return false;
19616 };
19617
19618 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19619 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19620
19621 // If there is a pattern, canonicalize a zext operand to the RHS.
19622 if (LHSHasPattern && !RHSHasPattern)
19623 std::swap(LHS, RHS);
19624 else if (!LHSHasPattern && !RHSHasPattern)
19625 return SDValue();
19626
19627 SDLoc DL(N);
19628 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19629 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19630 SDValue Cmp = RHS.getOperand(0);
19631 SDValue Z = Cmp.getOperand(0);
19632 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19633 int64_t NegConstant = 0 - Constant->getSExtValue();
19634
19635 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19636 default: break;
19637 case ISD::SETNE: {
19638 // when C == 0
19639 // --> addze X, (addic Z, -1).carry
19640 // /
19641 // add X, (zext(setne Z, C))--
19642 // \ when -32768 <= -C <= 32767 && C != 0
19643 // --> addze X, (addic (addi Z, -C), -1).carry
19644 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19645 DAG.getConstant(NegConstant, DL, MVT::i64));
19646 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19647 SDValue Addc =
19648 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19649 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19650 DAG.getConstant(0, DL, CarryType));
19651 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19652 DAG.getConstant(0, DL, MVT::i64),
19653 SDValue(Addc.getNode(), 1));
19654 }
19655 case ISD::SETEQ: {
19656 // when C == 0
19657 // --> addze X, (subfic Z, 0).carry
19658 // /
19659 // add X, (zext(sete Z, C))--
19660 // \ when -32768 <= -C <= 32767 && C != 0
19661 // --> addze X, (subfic (addi Z, -C), 0).carry
19662 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19663 DAG.getConstant(NegConstant, DL, MVT::i64));
19664 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19665 SDValue Subc =
19666 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19667 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
19668 DAG.getConstant(0, DL, CarryType));
19669 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
19670 DAG.getConstant(1UL, DL, CarryType));
19671 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19672 DAG.getConstant(0, DL, MVT::i64), Invert);
19673 }
19674 }
19675
19676 return SDValue();
19677}
19678
19679// Transform
19680// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19681// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19682// In this case both C1 and C2 must be known constants.
19683// C1+C2 must fit into a 34 bit signed integer.
19685 const PPCSubtarget &Subtarget) {
19686 if (!Subtarget.isUsingPCRelativeCalls())
19687 return SDValue();
19688
19689 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19690 // If we find that node try to cast the Global Address and the Constant.
19691 SDValue LHS = N->getOperand(0);
19692 SDValue RHS = N->getOperand(1);
19693
19694 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19695 std::swap(LHS, RHS);
19696
19697 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19698 return SDValue();
19699
19700 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19703
19704 // Check that both casts succeeded.
19705 if (!GSDN || !ConstNode)
19706 return SDValue();
19707
19708 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19709 SDLoc DL(GSDN);
19710
19711 // The signed int offset needs to fit in 34 bits.
19712 if (!isInt<34>(NewOffset))
19713 return SDValue();
19714
19715 // The new global address is a copy of the old global address except
19716 // that it has the updated Offset.
19717 SDValue GA =
19718 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19719 NewOffset, GSDN->getTargetFlags());
19720 SDValue MatPCRel =
19721 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19722 return MatPCRel;
19723}
19724
19725// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19726// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19727// Mathematical identity: X + 1 = X - (-1)
19728// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19729// Requirement: VSX feature for efficient xxleqv generation
19731 const PPCSubtarget &Subtarget) {
19732
19733 EVT VT = N->getValueType(0);
19734 if (!Subtarget.hasVSX())
19735 return SDValue();
19736
19737 // Handle v2i64, v4i32, v8i16 and v16i8 types
19738 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
19739 VT == MVT::v2i64))
19740 return SDValue();
19741
19742 SDValue LHS = N->getOperand(0);
19743 SDValue RHS = N->getOperand(1);
19744
19745 // Check if RHS is BUILD_VECTOR
19746 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19747 return SDValue();
19748
19749 // Check if all the elements are 1
19750 unsigned NumOfEles = RHS.getNumOperands();
19751 for (unsigned i = 0; i < NumOfEles; ++i) {
19752 auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
19753 if (!CN || CN->getSExtValue() != 1)
19754 return SDValue();
19755 }
19756 SDLoc DL(N);
19757
19758 SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
19759 SmallVector<SDValue, 4> Ops(4, MinusOne);
19760 SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
19761
19762 // Bitcast to the target vector type
19763 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
19764
19765 return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
19766}
19767
19768SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19769 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
19770 return Value;
19771
19772 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
19773 return Value;
19774
19775 if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
19776 return Value;
19777 return SDValue();
19778}
19779
19780// Detect TRUNCATE operations on bitcasts of float128 values.
19781// What we are looking for here is the situtation where we extract a subset
19782// of bits from a 128 bit float.
19783// This can be of two forms:
19784// 1) BITCAST of f128 feeding TRUNCATE
19785// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19786// The reason this is required is because we do not have a legal i128 type
19787// and so we want to prevent having to store the f128 and then reload part
19788// of it.
19789SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19790 DAGCombinerInfo &DCI) const {
19791 // If we are using CRBits then try that first.
19792 if (Subtarget.useCRBits()) {
19793 // Check if CRBits did anything and return that if it did.
19794 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19795 return CRTruncValue;
19796 }
19797
19798 SDLoc dl(N);
19799 SDValue Op0 = N->getOperand(0);
19800
19801 // Looking for a truncate of i128 to i64.
19802 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
19803 return SDValue();
19804
19805 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19806
19807 // SRL feeding TRUNCATE.
19808 if (Op0.getOpcode() == ISD::SRL) {
19809 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
19810 // The right shift has to be by 64 bits.
19811 if (!ConstNode || ConstNode->getZExtValue() != 64)
19812 return SDValue();
19813
19814 // Switch the element number to extract.
19815 EltToExtract = EltToExtract ? 0 : 1;
19816 // Update Op0 past the SRL.
19817 Op0 = Op0.getOperand(0);
19818 }
19819
19820 // BITCAST feeding a TRUNCATE possibly via SRL.
19821 if (Op0.getOpcode() == ISD::BITCAST &&
19822 Op0.getValueType() == MVT::i128 &&
19823 Op0.getOperand(0).getValueType() == MVT::f128) {
19824 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
19825 return DCI.DAG.getNode(
19826 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
19827 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
19828 }
19829 return SDValue();
19830}
19831
19832SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19833 SelectionDAG &DAG = DCI.DAG;
19834
19835 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
19836 if (!ConstOpOrElement)
19837 return SDValue();
19838
19839 // An imul is usually smaller than the alternative sequence for legal type.
19841 isOperationLegal(ISD::MUL, N->getValueType(0)))
19842 return SDValue();
19843
19844 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19845 switch (this->Subtarget.getCPUDirective()) {
19846 default:
19847 // TODO: enhance the condition for subtarget before pwr8
19848 return false;
19849 case PPC::DIR_PWR8:
19850 // type mul add shl
19851 // scalar 4 1 1
19852 // vector 7 2 2
19853 return true;
19854 case PPC::DIR_PWR9:
19855 case PPC::DIR_PWR10:
19856 case PPC::DIR_PWR11:
19858 // type mul add shl
19859 // scalar 5 2 2
19860 // vector 7 2 2
19861
19862 // The cycle RATIO of related operations are showed as a table above.
19863 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19864 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19865 // are 4, it is always profitable; but for 3 instrs patterns
19866 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19867 // So we should only do it for vector type.
19868 return IsAddOne && IsNeg ? VT.isVector() : true;
19869 }
19870 };
19871
19872 EVT VT = N->getValueType(0);
19873 SDLoc DL(N);
19874
19875 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19876 bool IsNeg = MulAmt.isNegative();
19877 APInt MulAmtAbs = MulAmt.abs();
19878
19879 if ((MulAmtAbs - 1).isPowerOf2()) {
19880 // (mul x, 2^N + 1) => (add (shl x, N), x)
19881 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19882
19883 if (!IsProfitable(IsNeg, true, VT))
19884 return SDValue();
19885
19886 SDValue Op0 = N->getOperand(0);
19887 SDValue Op1 =
19888 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19889 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
19890 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
19891
19892 if (!IsNeg)
19893 return Res;
19894
19895 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
19896 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19897 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19898 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19899
19900 if (!IsProfitable(IsNeg, false, VT))
19901 return SDValue();
19902
19903 SDValue Op0 = N->getOperand(0);
19904 SDValue Op1 =
19905 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19906 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
19907
19908 if (!IsNeg)
19909 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
19910 else
19911 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
19912
19913 } else {
19914 return SDValue();
19915 }
19916}
19917
19918// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19919// in combiner since we need to check SD flags and other subtarget features.
19920SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19921 DAGCombinerInfo &DCI) const {
19922 SDValue N0 = N->getOperand(0);
19923 SDValue N1 = N->getOperand(1);
19924 SDValue N2 = N->getOperand(2);
19925 SDNodeFlags Flags = N->getFlags();
19926 EVT VT = N->getValueType(0);
19927 SelectionDAG &DAG = DCI.DAG;
19928 unsigned Opc = N->getOpcode();
19930 bool LegalOps = !DCI.isBeforeLegalizeOps();
19931 SDLoc Loc(N);
19932
19933 if (!isOperationLegal(ISD::FMA, VT))
19934 return SDValue();
19935
19936 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19937 // since (fnmsub a b c)=-0 while c-ab=+0.
19938 if (!Flags.hasNoSignedZeros())
19939 return SDValue();
19940
19941 // (fma (fneg a) b c) => (fnmsub a b c)
19942 // (fnmsub (fneg a) b c) => (fma a b c)
19943 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
19944 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
19945
19946 // (fma a (fneg b) c) => (fnmsub a b c)
19947 // (fnmsub a (fneg b) c) => (fma a b c)
19948 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
19949 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
19950
19951 return SDValue();
19952}
19953
19954bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19955 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19956 if (!Subtarget.is64BitELFABI())
19957 return false;
19958
19959 // If not a tail call then no need to proceed.
19960 if (!CI->isTailCall())
19961 return false;
19962
19963 // If sibling calls have been disabled and tail-calls aren't guaranteed
19964 // there is no reason to duplicate.
19965 auto &TM = getTargetMachine();
19966 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19967 return false;
19968
19969 // Can't tail call a function called indirectly, or if it has variadic args.
19970 const Function *Callee = CI->getCalledFunction();
19971 if (!Callee || Callee->isVarArg())
19972 return false;
19973
19974 // Make sure the callee and caller calling conventions are eligible for tco.
19975 const Function *Caller = CI->getParent()->getParent();
19976 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
19977 CI->getCallingConv()))
19978 return false;
19979
19980 // If the function is local then we have a good chance at tail-calling it
19981 return getTargetMachine().shouldAssumeDSOLocal(Callee);
19982}
19983
19984bool PPCTargetLowering::
19985isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19986 const Value *Mask = AndI.getOperand(1);
19987 // If the mask is suitable for andi. or andis. we should sink the and.
19988 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
19989 // Can't handle constants wider than 64-bits.
19990 if (CI->getBitWidth() > 64)
19991 return false;
19992 int64_t ConstVal = CI->getZExtValue();
19993 return isUInt<16>(ConstVal) ||
19994 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
19995 }
19996
19997 // For non-constant masks, we can always use the record-form and.
19998 return true;
19999}
20000
20001/// getAddrModeForFlags - Based on the set of address flags, select the most
20002/// optimal instruction format to match by.
20003PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
20004 // This is not a node we should be handling here.
20005 if (Flags == PPC::MOF_None)
20006 return PPC::AM_None;
20007 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
20008 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
20009 if ((Flags & FlagSet) == FlagSet)
20010 return PPC::AM_DForm;
20011 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
20012 if ((Flags & FlagSet) == FlagSet)
20013 return PPC::AM_DSForm;
20014 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
20015 if ((Flags & FlagSet) == FlagSet)
20016 return PPC::AM_DQForm;
20017 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
20018 if ((Flags & FlagSet) == FlagSet)
20019 return PPC::AM_PrefixDForm;
20020 // If no other forms are selected, return an X-Form as it is the most
20021 // general addressing mode.
20022 return PPC::AM_XForm;
20023}
20024
20025/// Set alignment flags based on whether or not the Frame Index is aligned.
20026/// Utilized when computing flags for address computation when selecting
20027/// load and store instructions.
20028static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
20029 SelectionDAG &DAG) {
20030 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
20031 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
20032 if (!FI)
20033 return;
20035 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
20036 // If this is (add $FI, $S16Imm), the alignment flags are already set
20037 // based on the immediate. We just need to clear the alignment flags
20038 // if the FI alignment is weaker.
20039 if ((FrameIndexAlign % 4) != 0)
20040 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
20041 if ((FrameIndexAlign % 16) != 0)
20042 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
20043 // If the address is a plain FrameIndex, set alignment flags based on
20044 // FI alignment.
20045 if (!IsAdd) {
20046 if ((FrameIndexAlign % 4) == 0)
20047 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20048 if ((FrameIndexAlign % 16) == 0)
20049 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20050 }
20051}
20052
20053/// Given a node, compute flags that are used for address computation when
20054/// selecting load and store instructions. The flags computed are stored in
20055/// FlagSet. This function takes into account whether the node is a constant,
20056/// an ADD, OR, or a constant, and computes the address flags accordingly.
20057static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
20058 SelectionDAG &DAG) {
20059 // Set the alignment flags for the node depending on if the node is
20060 // 4-byte or 16-byte aligned.
20061 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
20062 if ((Imm & 0x3) == 0)
20063 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20064 if ((Imm & 0xf) == 0)
20065 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20066 };
20067
20069 // All 32-bit constants can be computed as LIS + Disp.
20070 const APInt &ConstImm = CN->getAPIntValue();
20071 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
20072 FlagSet |= PPC::MOF_AddrIsSImm32;
20073 SetAlignFlagsForImm(ConstImm.getZExtValue());
20074 setAlignFlagsForFI(N, FlagSet, DAG);
20075 }
20076 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
20077 FlagSet |= PPC::MOF_RPlusSImm34;
20078 else // Let constant materialization handle large constants.
20079 FlagSet |= PPC::MOF_NotAddNorCst;
20080 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
20081 // This address can be represented as an addition of:
20082 // - Register + Imm16 (possibly a multiple of 4/16)
20083 // - Register + Imm34
20084 // - Register + PPCISD::Lo
20085 // - Register + Register
20086 // In any case, we won't have to match this as Base + Zero.
20087 SDValue RHS = N.getOperand(1);
20089 const APInt &ConstImm = CN->getAPIntValue();
20090 if (ConstImm.isSignedIntN(16)) {
20091 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
20092 SetAlignFlagsForImm(ConstImm.getZExtValue());
20093 setAlignFlagsForFI(N, FlagSet, DAG);
20094 }
20095 if (ConstImm.isSignedIntN(34))
20096 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
20097 else
20098 FlagSet |= PPC::MOF_RPlusR; // Register.
20099 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
20100 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
20101 else
20102 FlagSet |= PPC::MOF_RPlusR;
20103 } else { // The address computation is not a constant or an addition.
20104 setAlignFlagsForFI(N, FlagSet, DAG);
20105 FlagSet |= PPC::MOF_NotAddNorCst;
20106 }
20107}
20108
20109static bool isPCRelNode(SDValue N) {
20110 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
20115}
20116
20117/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
20118/// the address flags of the load/store instruction that is to be matched.
20119unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
20120 SelectionDAG &DAG) const {
20121 unsigned FlagSet = PPC::MOF_None;
20122
20123 // Compute subtarget flags.
20124 if (!Subtarget.hasP9Vector())
20125 FlagSet |= PPC::MOF_SubtargetBeforeP9;
20126 else
20127 FlagSet |= PPC::MOF_SubtargetP9;
20128
20129 if (Subtarget.hasPrefixInstrs())
20130 FlagSet |= PPC::MOF_SubtargetP10;
20131
20132 if (Subtarget.hasSPE())
20133 FlagSet |= PPC::MOF_SubtargetSPE;
20134
20135 // Check if we have a PCRel node and return early.
20136 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
20137 return FlagSet;
20138
20139 // If the node is the paired load/store intrinsics, compute flags for
20140 // address computation and return early.
20141 unsigned ParentOp = Parent->getOpcode();
20142 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
20143 (ParentOp == ISD::INTRINSIC_VOID))) {
20144 unsigned ID = Parent->getConstantOperandVal(1);
20145 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
20146 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
20147 ? Parent->getOperand(2)
20148 : Parent->getOperand(3);
20149 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
20150 FlagSet |= PPC::MOF_Vector;
20151 return FlagSet;
20152 }
20153 }
20154
20155 // Mark this as something we don't want to handle here if it is atomic
20156 // or pre-increment instruction.
20157 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
20158 if (LSB->isIndexed())
20159 return PPC::MOF_None;
20160
20161 // Compute in-memory type flags. This is based on if there are scalars,
20162 // floats or vectors.
20163 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
20164 assert(MN && "Parent should be a MemSDNode!");
20165 EVT MemVT = MN->getMemoryVT();
20166 unsigned Size = MemVT.getSizeInBits();
20167 if (MemVT.isScalarInteger()) {
20168 assert(Size <= 128 &&
20169 "Not expecting scalar integers larger than 16 bytes!");
20170 if (Size < 32)
20171 FlagSet |= PPC::MOF_SubWordInt;
20172 else if (Size == 32)
20173 FlagSet |= PPC::MOF_WordInt;
20174 else
20175 FlagSet |= PPC::MOF_DoubleWordInt;
20176 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
20177 if (Size == 128)
20178 FlagSet |= PPC::MOF_Vector;
20179 else if (Size == 256) {
20180 assert(Subtarget.pairedVectorMemops() &&
20181 "256-bit vectors are only available when paired vector memops is "
20182 "enabled!");
20183 FlagSet |= PPC::MOF_Vector;
20184 } else
20185 llvm_unreachable("Not expecting illegal vectors!");
20186 } else { // Floating point type: can be scalar, f128 or vector types.
20187 if (Size == 32 || Size == 64)
20188 FlagSet |= PPC::MOF_ScalarFloat;
20189 else if (MemVT == MVT::f128 || MemVT.isVector())
20190 FlagSet |= PPC::MOF_Vector;
20191 else
20192 llvm_unreachable("Not expecting illegal scalar floats!");
20193 }
20194
20195 // Compute flags for address computation.
20196 computeFlagsForAddressComputation(N, FlagSet, DAG);
20197
20198 // Compute type extension flags.
20199 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
20200 switch (LN->getExtensionType()) {
20201 case ISD::SEXTLOAD:
20202 FlagSet |= PPC::MOF_SExt;
20203 break;
20204 case ISD::EXTLOAD:
20205 case ISD::ZEXTLOAD:
20206 FlagSet |= PPC::MOF_ZExt;
20207 break;
20208 case ISD::NON_EXTLOAD:
20209 FlagSet |= PPC::MOF_NoExt;
20210 break;
20211 }
20212 } else
20213 FlagSet |= PPC::MOF_NoExt;
20214
20215 // For integers, no extension is the same as zero extension.
20216 // We set the extension mode to zero extension so we don't have
20217 // to add separate entries in AddrModesMap for loads and stores.
20218 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
20219 FlagSet |= PPC::MOF_ZExt;
20220 FlagSet &= ~PPC::MOF_NoExt;
20221 }
20222
20223 // If we don't have prefixed instructions, 34-bit constants should be
20224 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
20225 bool IsNonP1034BitConst =
20227 FlagSet) == PPC::MOF_RPlusSImm34;
20228 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
20229 IsNonP1034BitConst)
20230 FlagSet |= PPC::MOF_NotAddNorCst;
20231
20232 return FlagSet;
20233}
20234
20235/// SelectForceXFormMode - Given the specified address, force it to be
20236/// represented as an indexed [r+r] operation (an XForm instruction).
20238 SDValue &Base,
20239 SelectionDAG &DAG) const {
20240
20242 int16_t ForceXFormImm = 0;
20243 if (provablyDisjointOr(DAG, N) &&
20244 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
20245 Disp = N.getOperand(0);
20246 Base = N.getOperand(1);
20247 return Mode;
20248 }
20249
20250 // If the address is the result of an add, we will utilize the fact that the
20251 // address calculation includes an implicit add. However, we can reduce
20252 // register pressure if we do not materialize a constant just for use as the
20253 // index register. We only get rid of the add if it is not an add of a
20254 // value and a 16-bit signed constant and both have a single use.
20255 if (N.getOpcode() == ISD::ADD &&
20256 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
20257 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
20258 Disp = N.getOperand(0);
20259 Base = N.getOperand(1);
20260 return Mode;
20261 }
20262
20263 // Otherwise, use R0 as the base register.
20264 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20265 N.getValueType());
20266 Base = N;
20267
20268 return Mode;
20269}
20270
20272 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20273 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20274 EVT ValVT = Val.getValueType();
20275 // If we are splitting a scalar integer into f64 parts (i.e. so they
20276 // can be placed into VFRC registers), we need to zero extend and
20277 // bitcast the values. This will ensure the value is placed into a
20278 // VSR using direct moves or stack operations as needed.
20279 if (PartVT == MVT::f64 &&
20280 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
20281 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
20282 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
20283 Parts[0] = Val;
20284 return true;
20285 }
20286 return false;
20287}
20288
20289SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
20290 SelectionDAG &DAG) const {
20291 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20293 EVT RetVT = Op.getValueType();
20294 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
20295 SDValue Callee =
20296 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
20297 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
20299 for (const SDValue &N : Op->op_values()) {
20300 EVT ArgVT = N.getValueType();
20301 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
20302 TargetLowering::ArgListEntry Entry(N, ArgTy);
20303 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
20304 Entry.IsZExt = !Entry.IsSExt;
20305 Args.push_back(Entry);
20306 }
20307
20308 SDValue InChain = DAG.getEntryNode();
20309 SDValue TCChain = InChain;
20310 const Function &F = DAG.getMachineFunction().getFunction();
20311 bool isTailCall =
20312 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
20313 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
20314 if (isTailCall)
20315 InChain = TCChain;
20316 CLI.setDebugLoc(SDLoc(Op))
20317 .setChain(InChain)
20318 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
20319 .setTailCall(isTailCall)
20320 .setSExtResult(SignExtend)
20321 .setZExtResult(!SignExtend)
20323 return TLI.LowerCallTo(CLI).first;
20324}
20325
20326SDValue PPCTargetLowering::lowerLibCallBasedOnType(
20327 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
20328 SelectionDAG &DAG) const {
20329 if (Op.getValueType() == MVT::f32)
20330 return lowerToLibCall(LibCallFloatName, Op, DAG);
20331
20332 if (Op.getValueType() == MVT::f64)
20333 return lowerToLibCall(LibCallDoubleName, Op, DAG);
20334
20335 return SDValue();
20336}
20337
20338bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20339 SDNodeFlags Flags = Op.getNode()->getFlags();
20340 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20341 Flags.hasNoNaNs() && Flags.hasNoInfs();
20342}
20343
20344bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20345 return Op.getNode()->getFlags().hasApproximateFuncs();
20346}
20347
20348bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20350}
20351
20352SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20353 const char *LibCallFloatName,
20354 const char *LibCallDoubleNameFinite,
20355 const char *LibCallFloatNameFinite,
20356 SDValue Op,
20357 SelectionDAG &DAG) const {
20358 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
20359 return SDValue();
20360
20361 if (!isLowringToMASSFiniteSafe(Op))
20362 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20363 DAG);
20364
20365 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
20366 LibCallDoubleNameFinite, Op, DAG);
20367}
20368
20369SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20370 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
20371 "__xl_powf_finite", Op, DAG);
20372}
20373
20374SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20375 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
20376 "__xl_sinf_finite", Op, DAG);
20377}
20378
20379SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20380 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
20381 "__xl_cosf_finite", Op, DAG);
20382}
20383
20384SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20385 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
20386 "__xl_logf_finite", Op, DAG);
20387}
20388
20389SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20390 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
20391 "__xl_log10f_finite", Op, DAG);
20392}
20393
20394SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20395 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
20396 "__xl_expf_finite", Op, DAG);
20397}
20398
20399// If we happen to match to an aligned D-Form, check if the Frame Index is
20400// adequately aligned. If it is not, reset the mode to match to X-Form.
20401static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20404 return;
20405 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
20408}
20409
20410/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20411/// compute the address flags of the node, get the optimal address mode based
20412/// on the flags, and set the Base and Disp based on the address mode.
20414 SDValue N, SDValue &Disp,
20415 SDValue &Base,
20416 SelectionDAG &DAG,
20417 MaybeAlign Align) const {
20418 SDLoc DL(Parent);
20419
20420 // Compute the address flags.
20421 unsigned Flags = computeMOFlags(Parent, N, DAG);
20422
20423 // Get the optimal address mode based on the Flags.
20424 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20425
20426 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20427 // Select an X-Form load if it is not.
20428 setXFormForUnalignedFI(N, Flags, Mode);
20429
20430 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20431 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20432 assert(Subtarget.isUsingPCRelativeCalls() &&
20433 "Must be using PC-Relative calls when a valid PC-Relative node is "
20434 "present!");
20435 Mode = PPC::AM_PCRel;
20436 }
20437
20438 // Set Base and Disp accordingly depending on the address mode.
20439 switch (Mode) {
20440 case PPC::AM_DForm:
20441 case PPC::AM_DSForm:
20442 case PPC::AM_DQForm: {
20443 // This is a register plus a 16-bit immediate. The base will be the
20444 // register and the displacement will be the immediate unless it
20445 // isn't sufficiently aligned.
20446 if (Flags & PPC::MOF_RPlusSImm16) {
20447 SDValue Op0 = N.getOperand(0);
20448 SDValue Op1 = N.getOperand(1);
20449 int16_t Imm = Op1->getAsZExtVal();
20450 if (!Align || isAligned(*Align, Imm)) {
20451 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
20452 Base = Op0;
20454 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20455 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20456 }
20457 break;
20458 }
20459 }
20460 // This is a register plus the @lo relocation. The base is the register
20461 // and the displacement is the global address.
20462 else if (Flags & PPC::MOF_RPlusLo) {
20463 Disp = N.getOperand(1).getOperand(0); // The global address.
20468 Base = N.getOperand(0);
20469 break;
20470 }
20471 // This is a constant address at most 32 bits. The base will be
20472 // zero or load-immediate-shifted and the displacement will be
20473 // the low 16 bits of the address.
20474 else if (Flags & PPC::MOF_AddrIsSImm32) {
20475 auto *CN = cast<ConstantSDNode>(N);
20476 EVT CNType = CN->getValueType(0);
20477 uint64_t CNImm = CN->getZExtValue();
20478 // If this address fits entirely in a 16-bit sext immediate field, codegen
20479 // this as "d, 0".
20480 int16_t Imm;
20481 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
20482 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
20483 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20484 CNType);
20485 break;
20486 }
20487 // Handle 32-bit sext immediate with LIS + Addr mode.
20488 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
20489 (!Align || isAligned(*Align, CNImm))) {
20490 int32_t Addr = (int32_t)CNImm;
20491 // Otherwise, break this down into LIS + Disp.
20492 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
20493 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
20494 MVT::i32);
20495 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20496 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
20497 break;
20498 }
20499 }
20500 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20501 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
20503 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20504 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20505 } else
20506 Base = N;
20507 break;
20508 }
20509 case PPC::AM_PrefixDForm: {
20510 int64_t Imm34 = 0;
20511 unsigned Opcode = N.getOpcode();
20512 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20513 (isIntS34Immediate(N.getOperand(1), Imm34))) {
20514 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20515 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20516 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
20517 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20518 else
20519 Base = N.getOperand(0);
20520 } else if (isIntS34Immediate(N, Imm34)) {
20521 // The address is a 34-bit signed immediate.
20522 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20523 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
20524 }
20525 break;
20526 }
20527 case PPC::AM_PCRel: {
20528 // When selecting PC-Relative instructions, "Base" is not utilized as
20529 // we select the address as [PC+imm].
20530 Disp = N;
20531 break;
20532 }
20533 case PPC::AM_None:
20534 break;
20535 default: { // By default, X-Form is always available to be selected.
20536 // When a frame index is not aligned, we also match by XForm.
20538 Base = FI ? N : N.getOperand(1);
20539 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20540 N.getValueType())
20541 : N.getOperand(0);
20542 break;
20543 }
20544 }
20545 return Mode;
20546}
20547
20549 bool Return,
20550 bool IsVarArg) const {
20551 switch (CC) {
20552 case CallingConv::Cold:
20553 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20554 default:
20555 return CC_PPC64_ELF;
20556 }
20557}
20558
20560 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20561}
20562
20565 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20566 if (shouldInlineQuadwordAtomics() && Size == 128)
20568
20569 switch (AI->getOperation()) {
20575 default:
20577 }
20578
20579 llvm_unreachable("unreachable atomicrmw operation");
20580}
20581
20590
20591static Intrinsic::ID
20593 switch (BinOp) {
20594 default:
20595 llvm_unreachable("Unexpected AtomicRMW BinOp");
20597 return Intrinsic::ppc_atomicrmw_xchg_i128;
20598 case AtomicRMWInst::Add:
20599 return Intrinsic::ppc_atomicrmw_add_i128;
20600 case AtomicRMWInst::Sub:
20601 return Intrinsic::ppc_atomicrmw_sub_i128;
20602 case AtomicRMWInst::And:
20603 return Intrinsic::ppc_atomicrmw_and_i128;
20604 case AtomicRMWInst::Or:
20605 return Intrinsic::ppc_atomicrmw_or_i128;
20606 case AtomicRMWInst::Xor:
20607 return Intrinsic::ppc_atomicrmw_xor_i128;
20609 return Intrinsic::ppc_atomicrmw_nand_i128;
20610 }
20611}
20612
20614 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20615 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20616 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20617 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20618 Type *ValTy = Incr->getType();
20619 assert(ValTy->getPrimitiveSizeInBits() == 128);
20620 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20621 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20622 Value *IncrHi =
20623 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20624 Value *LoHi = Builder.CreateIntrinsic(
20626 {AlignedAddr, IncrLo, IncrHi});
20627 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20628 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20629 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20630 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20631 return Builder.CreateOr(
20632 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20633}
20634
20636 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20637 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20638 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20639 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20640 Type *ValTy = CmpVal->getType();
20641 assert(ValTy->getPrimitiveSizeInBits() == 128);
20642 Function *IntCmpXchg =
20643 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20644 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20645 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20646 Value *CmpHi =
20647 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20648 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20649 Value *NewHi =
20650 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20651 emitLeadingFence(Builder, CI, Ord);
20652 Value *LoHi =
20653 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20654 emitTrailingFence(Builder, CI, Ord);
20655 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20656 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20657 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20658 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20659 return Builder.CreateOr(
20660 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20661}
20662
20664 return Subtarget.useCRBits();
20665}
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS)
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, SelectionDAG &DAG)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static bool canConvertSETCCToXori(SDNode *N)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N, const SDLoc &DL)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5976
bool isDenormal() const
Definition APFloat.h:1517
APInt bitcastToAPInt() const
Definition APFloat.h:1408
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1421
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
APInt abs() const
Get the absolute value.
Definition APInt.h:1810
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1411
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1737
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:215
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:362
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:775
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
arg_iterator arg_begin()
Definition Function.h:868
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
const Argument * const_arg_iterator
Definition Function.h:74
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:651
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:192
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:133
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
Tracks which library functions to use for a particular subtarget.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1080
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:184
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:275
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ TargetConstantPool
Definition ISDOpcodes.h:189
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SET_ROUNDING
Set rounding mode.
Definition ISDOpcodes.h:975
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ TargetExternalSymbol
Definition ISDOpcodes.h:190
@ BR
Control flow instructions. These all have token chains.
@ TargetJumpTable
Definition ISDOpcodes.h:188
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:185
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:970
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ INLINEASM
INLINEASM - Represents an inline asm block.
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:186
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:147
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:148
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:196
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:199
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:174
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:205
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:156
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:123
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:152
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:202
@ MO_TPREL_HA
Definition PPC.h:181
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:115
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:190
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:142
@ MO_TPREL_LO
Definition PPC.h:180
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:177
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:168
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:193
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:137
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:162
@ MO_HA
Definition PPC.h:178
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:119
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering)
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
LLVM_ABI std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:150
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:461
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.