LLVM 20.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallSet.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/CallingConv.h"
61#include "llvm/IR/Constant.h"
62#include "llvm/IR/Constants.h"
63#include "llvm/IR/DataLayout.h"
64#include "llvm/IR/DebugLoc.h"
66#include "llvm/IR/Function.h"
67#include "llvm/IR/GlobalValue.h"
68#include "llvm/IR/IRBuilder.h"
70#include "llvm/IR/Intrinsics.h"
71#include "llvm/IR/IntrinsicsPowerPC.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/Value.h"
76#include "llvm/MC/MCContext.h"
77#include "llvm/MC/MCExpr.h"
87#include "llvm/Support/Debug.h"
89#include "llvm/Support/Format.h"
95#include <algorithm>
96#include <cassert>
97#include <cstdint>
98#include <iterator>
99#include <list>
100#include <optional>
101#include <utility>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE "ppc-lowering"
107
109 "disable-p10-store-forward",
110 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
111 cl::init(false));
112
113static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
114cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
117cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
120cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
121
122static cl::opt<bool> DisableSCO("disable-ppc-sco",
123cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
124
125static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
126cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
127
128static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
129cl::desc("use absolute jump tables on ppc"), cl::Hidden);
130
131static cl::opt<bool>
132 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
133 cl::desc("disable vector permute decomposition"),
134 cl::init(true), cl::Hidden);
135
137 "disable-auto-paired-vec-st",
138 cl::desc("disable automatically generated 32byte paired vector stores"),
139 cl::init(true), cl::Hidden);
140
142 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
143 cl::desc("Set minimum number of entries to use a jump table on PPC"));
144
146 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
147 cl::desc("max depth when checking alias info in GatherAllAliases()"));
148
150 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
151 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
152 "function to use initial-exec"));
153
154STATISTIC(NumTailCalls, "Number of tail calls");
155STATISTIC(NumSiblingCalls, "Number of sibling calls");
156STATISTIC(ShufflesHandledWithVPERM,
157 "Number of shuffles lowered to a VPERM or XXPERM");
158STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
159
160static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
161
162static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
163
164static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
165
166// A faster local-[exec|dynamic] TLS access sequence (enabled with the
167// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
168// variables; consistent with the IBM XL compiler, we apply a max size of
169// slightly under 32KB.
171
172// FIXME: Remove this once the bug has been fixed!
174
176 const PPCSubtarget &STI)
177 : TargetLowering(TM), Subtarget(STI) {
178 // Initialize map that relates the PPC addressing modes to the computed flags
179 // of a load/store instruction. The map is used to determine the optimal
180 // addressing mode when selecting load and stores.
181 initializeAddrModeMap();
182 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183 // arguments are at least 4/8 bytes aligned.
184 bool isPPC64 = Subtarget.isPPC64();
185 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
186
187 // Set up the register classes.
188 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
189 if (!useSoftFloat()) {
190 if (hasSPE()) {
191 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
192 // EFPU2 APU only supports f32
193 if (!Subtarget.hasEFPU2())
194 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
195 } else {
196 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
197 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
198 }
199 }
200
201 // Match BITREVERSE to customized fast code sequence in the td file.
204
205 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
207
208 // Custom lower inline assembly to check for special registers.
211
212 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
213 for (MVT VT : MVT::integer_valuetypes()) {
216 }
217
218 if (Subtarget.isISA3_0()) {
219 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
220 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
221 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
222 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
223 } else {
224 // No extending loads from f16 or HW conversions back and forth.
225 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
228 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
231 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
232 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
233 }
234
235 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
236
237 // PowerPC has pre-inc load and store's.
248 if (!Subtarget.hasSPE()) {
253 }
254
255 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
256 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
257 for (MVT VT : ScalarIntVTs) {
262 }
263
264 if (Subtarget.useCRBits()) {
266
267 if (isPPC64 || Subtarget.hasFPCVT()) {
270 isPPC64 ? MVT::i64 : MVT::i32);
273 isPPC64 ? MVT::i64 : MVT::i32);
274
277 isPPC64 ? MVT::i64 : MVT::i32);
280 isPPC64 ? MVT::i64 : MVT::i32);
281
284 isPPC64 ? MVT::i64 : MVT::i32);
287 isPPC64 ? MVT::i64 : MVT::i32);
288
291 isPPC64 ? MVT::i64 : MVT::i32);
294 isPPC64 ? MVT::i64 : MVT::i32);
295 } else {
300 }
301
302 // PowerPC does not support direct load/store of condition registers.
305
306 // FIXME: Remove this once the ANDI glue bug is fixed:
307 if (ANDIGlueBug)
309
310 for (MVT VT : MVT::integer_valuetypes()) {
313 setTruncStoreAction(VT, MVT::i1, Expand);
314 }
315
316 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
317 }
318
319 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
320 // PPC (the libcall is not available).
325
326 // We do not currently implement these libm ops for PowerPC.
327 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
328 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
329 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
330 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
332 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
333
334 // PowerPC has no SREM/UREM instructions unless we are on P9
335 // On P9 we may use a hardware instruction to compute the remainder.
336 // When the result of both the remainder and the division is required it is
337 // more efficient to compute the remainder from the result of the division
338 // rather than use the remainder instruction. The instructions are legalized
339 // directly because the DivRemPairsPass performs the transformation at the IR
340 // level.
341 if (Subtarget.isISA3_0()) {
346 } else {
351 }
352
353 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
362
363 // Handle constrained floating-point operations of scalar.
364 // TODO: Handle SPE specific operation.
370
375
376 if (!Subtarget.hasSPE()) {
379 }
380
381 if (Subtarget.hasVSX()) {
384 }
385
386 if (Subtarget.hasFSQRT()) {
389 }
390
391 if (Subtarget.hasFPRND()) {
396
401 }
402
403 // We don't support sin/cos/sqrt/fmod/pow
414
415 // MASS transformation for LLVM intrinsics with replicating fast-math flag
416 // to be consistent to PPCGenScalarMASSEntries pass
417 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
430 }
431
432 if (Subtarget.hasSPE()) {
435 } else {
436 setOperationAction(ISD::FMA , MVT::f64, Legal);
437 setOperationAction(ISD::FMA , MVT::f32, Legal);
438 }
439
440 if (Subtarget.hasSPE())
441 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
442
444
445 // If we're enabling GP optimizations, use hardware square root
446 if (!Subtarget.hasFSQRT() &&
447 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
448 Subtarget.hasFRE()))
450
451 if (!Subtarget.hasFSQRT() &&
452 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
453 Subtarget.hasFRES()))
455
456 if (Subtarget.hasFCPSGN()) {
459 } else {
462 }
463
464 if (Subtarget.hasFPRND()) {
469
474 }
475
476 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
477 // instruction xxbrd to speed up scalar BSWAP64.
478 if (Subtarget.isISA3_1()) {
481 } else {
484 ISD::BSWAP, MVT::i64,
485 (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
486 }
487
488 // CTPOP or CTTZ were introduced in P8/P9 respectively
489 if (Subtarget.isISA3_0()) {
490 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
491 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
492 } else {
493 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
494 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
495 }
496
497 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
500 } else {
503 }
504
505 // PowerPC does not have ROTR
508
509 if (!Subtarget.useCRBits()) {
510 // PowerPC does not have Select
515 }
516
517 // PowerPC wants to turn select_cc of FP into fsel when possible.
520
521 // PowerPC wants to optimize integer setcc a bit
522 if (!Subtarget.useCRBits())
524
525 if (Subtarget.hasFPU()) {
529
533 }
534
535 // PowerPC does not have BRCOND which requires SetCC
536 if (!Subtarget.useCRBits())
538
540
541 if (Subtarget.hasSPE()) {
542 // SPE has built-in conversions
549
550 // SPE supports signaling compare of f32/f64.
553 } else {
554 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
557
558 // PowerPC does not have [U|S]INT_TO_FP
563 }
564
565 if (Subtarget.hasDirectMove() && isPPC64) {
570 if (TM.Options.UnsafeFPMath) {
579 }
580 } else {
585 }
586
587 // We cannot sextinreg(i1). Expand to shifts.
589
590 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
591 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
592 // support continuation, user-level threading, and etc.. As a result, no
593 // other SjLj exception interfaces are implemented and please don't build
594 // your own exception handling based on them.
595 // LLVM/Clang supports zero-cost DWARF exception handling.
598
599 // We want to legalize GlobalAddress and ConstantPool nodes into the
600 // appropriate instructions to materialize the address.
611
612 // TRAP is legal.
613 setOperationAction(ISD::TRAP, MVT::Other, Legal);
614
615 // TRAMPOLINE is custom lowered.
618
619 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
621
622 if (Subtarget.is64BitELFABI()) {
623 // VAARG always uses double-word chunks, so promote anything smaller.
625 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
627 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
629 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
631 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
633 } else if (Subtarget.is32BitELFABI()) {
634 // VAARG is custom lowered with the 32-bit SVR4 ABI.
637 } else
639
640 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
641 if (Subtarget.is32BitELFABI())
643 else
645
646 // Use the default implementation.
647 setOperationAction(ISD::VAEND , MVT::Other, Expand);
656
657 // We want to custom lower some of our intrinsics.
663
664 // To handle counter-based loop conditions.
666
671
672 // Comparisons that require checking two conditions.
673 if (Subtarget.hasSPE()) {
678 }
691
694
695 if (Subtarget.has64BitSupport()) {
696 // They also have instructions for converting between i64 and fp.
705 // This is just the low 32 bits of a (signed) fp->i64 conversion.
706 // We cannot do this with Promote because i64 is not a legal type.
709
710 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
713 }
714 } else {
715 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
716 if (Subtarget.hasSPE()) {
719 } else {
722 }
723 }
724
725 // With the instructions enabled under FPCVT, we can do everything.
726 if (Subtarget.hasFPCVT()) {
727 if (Subtarget.has64BitSupport()) {
736 }
737
746 }
747
748 if (Subtarget.use64BitRegs()) {
749 // 64-bit PowerPC implementations can support i64 types directly
750 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
751 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
753 // 64-bit PowerPC wants to expand i128 shifts itself.
757 } else {
758 // 32-bit PowerPC wants to expand i64 shifts itself.
762 }
763
764 // PowerPC has better expansions for funnel shifts than the generic
765 // TargetLowering::expandFunnelShift.
766 if (Subtarget.has64BitSupport()) {
769 }
772
773 if (Subtarget.hasVSX()) {
778 }
779
780 if (Subtarget.hasAltivec()) {
781 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
786 }
787 // First set operation action for all vector types to expand. Then we
788 // will selectively turn on ones that can be effectively codegen'd.
790 // add/sub are legal for all supported vector VT's.
793
794 // For v2i64, these are only valid with P8Vector. This is corrected after
795 // the loop.
796 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
801 }
802 else {
807 }
808
809 if (Subtarget.hasVSX()) {
812 }
813
814 // Vector instructions introduced in P8
815 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
818 }
819 else {
822 }
823
824 // Vector instructions introduced in P9
825 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
827 else
829
830 // We promote all shuffles to v16i8.
832 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
833
834 // We promote all non-typed operations to v4i32.
836 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
838 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
840 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
842 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
844 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
847 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
849 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
850
851 // No other operations are legal.
890
891 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
892 setTruncStoreAction(VT, InnerVT, Expand);
895 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
896 }
897 }
899 if (!Subtarget.hasP8Vector()) {
900 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
901 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
902 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
903 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
904 }
905
906 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
907 // with merges, splats, etc.
909
910 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
911 // are cheap, so handle them before they get expanded to scalar.
917
918 setOperationAction(ISD::AND , MVT::v4i32, Legal);
919 setOperationAction(ISD::OR , MVT::v4i32, Legal);
920 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
921 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
923 Subtarget.useCRBits() ? Legal : Expand);
924 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
934 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
937
938 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
939 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
940 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
941 if (Subtarget.hasAltivec())
942 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
944 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
945 if (Subtarget.hasP8Altivec())
946 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
947
948 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
949 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
950 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
951 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
952
953 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
954 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
955
956 if (Subtarget.hasVSX()) {
957 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
958 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
960 }
961
962 if (Subtarget.hasP8Altivec())
963 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
964 else
965 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
966
967 if (Subtarget.isISA3_1()) {
968 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
969 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
970 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
971 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
972 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
973 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
974 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
975 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
976 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
977 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
978 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
979 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
980 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
981 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
982 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
983 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
984 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
985 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
986 }
987
988 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
989 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
990
993 // LE is P8+/64-bit so direct moves are supported and these operations
994 // are legal. The custom transformation requires 64-bit since we need a
995 // pair of stores that will cover a 128-bit load for P10.
996 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1000 }
1001
1006
1007 // Altivec does not contain unordered floating-point compare instructions
1008 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1009 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1010 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1011 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1012
1013 if (Subtarget.hasVSX()) {
1016 if (Subtarget.hasP8Vector()) {
1019 }
1020 if (Subtarget.hasDirectMove() && isPPC64) {
1029 }
1031
1032 // The nearbyint variants are not allowed to raise the inexact exception
1033 // so we can only code-gen them with unsafe math.
1034 if (TM.Options.UnsafeFPMath) {
1037 }
1038
1039 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1040 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1041 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1043 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1044 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1047
1049 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1050 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1053
1054 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1055 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1056
1057 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1058 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1059
1060 // Share the Altivec comparison restrictions.
1061 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1062 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1063 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1064 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1065
1066 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1067 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1068
1070
1071 if (Subtarget.hasP8Vector())
1072 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1073
1074 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1075
1076 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1077 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1078 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1079
1080 if (Subtarget.hasP8Altivec()) {
1081 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1082 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1083 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1084
1085 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1086 // SRL, but not for SRA because of the instructions available:
1087 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1088 // doing
1089 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1090 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1091 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1092
1093 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1094 }
1095 else {
1096 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1097 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1098 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1099
1100 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1101
1102 // VSX v2i64 only supports non-arithmetic operations.
1103 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1104 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1105 }
1106
1107 if (Subtarget.isISA3_1())
1108 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1109 else
1110 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1111
1112 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1113 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1115 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1116
1118
1127
1128 // Custom handling for partial vectors of integers converted to
1129 // floating point. We already have optimal handling for v2i32 through
1130 // the DAG combine, so those aren't necessary.
1147
1148 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1149 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1150 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1151 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1154
1157
1158 // Handle constrained floating-point operations of vector.
1159 // The predictor is `hasVSX` because altivec instruction has
1160 // no exception but VSX vector instruction has.
1174
1188
1189 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1190 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1191
1192 for (MVT FPT : MVT::fp_valuetypes())
1193 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1194
1195 // Expand the SELECT to SELECT_CC
1197
1198 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1199 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1200
1201 // No implementation for these ops for PowerPC.
1203 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1204 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1205 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1207 setOperationAction(ISD::FREM, MVT::f128, Expand);
1208 }
1209
1210 if (Subtarget.hasP8Altivec()) {
1211 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1212 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1213 }
1214
1215 if (Subtarget.hasP9Vector()) {
1218
1219 // Test data class instructions store results in CR bits.
1220 if (Subtarget.useCRBits()) {
1224 }
1225
1226 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1227 // SRL, but not for SRA because of the instructions available:
1228 // VS{RL} and VS{RL}O.
1229 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1230 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1231 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1232
1233 setOperationAction(ISD::FADD, MVT::f128, Legal);
1234 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1235 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1236 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1238
1239 setOperationAction(ISD::FMA, MVT::f128, Legal);
1246
1248 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1250 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1253
1257
1258 // Handle constrained floating-point operations of fp128
1275 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1276 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1277 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1278 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1279 } else if (Subtarget.hasVSX()) {
1282
1283 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1284 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1285
1286 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1287 // fp_to_uint and int_to_fp.
1290
1291 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1292 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1293 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1294 setOperationAction(ISD::FABS, MVT::f128, Expand);
1296 setOperationAction(ISD::FMA, MVT::f128, Expand);
1298
1299 // Expand the fp_extend if the target type is fp128.
1302
1303 // Expand the fp_round if the source type is fp128.
1304 for (MVT VT : {MVT::f32, MVT::f64}) {
1307 }
1308
1313
1314 // Lower following f128 select_cc pattern:
1315 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1317
1318 // We need to handle f128 SELECT_CC with integer result type.
1320 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1321 }
1322
1323 if (Subtarget.hasP9Altivec()) {
1324 if (Subtarget.isISA3_1()) {
1329 } else {
1332 }
1340
1341 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1342 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1343 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1344 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1345 }
1346
1347 if (Subtarget.hasP10Vector()) {
1349 }
1350 }
1351
1352 if (Subtarget.pairedVectorMemops()) {
1353 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1354 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1355 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1356 }
1357 if (Subtarget.hasMMA()) {
1358 if (Subtarget.isISAFuture())
1359 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1360 else
1361 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1362 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1363 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1365 }
1366
1367 if (Subtarget.has64BitSupport())
1369
1370 if (Subtarget.isISA3_1())
1371 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1372
1373 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1374
1375 if (!isPPC64) {
1378 }
1379
1384 }
1385
1387
1388 if (Subtarget.hasAltivec()) {
1389 // Altivec instructions set fields to all zeros or all ones.
1391 }
1392
1395 else if (isPPC64)
1397 else
1399
1400 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1401
1402 // We have target-specific dag combine patterns for the following nodes:
1405 if (Subtarget.hasFPCVT())
1408 if (Subtarget.useCRBits())
1412
1414
1416
1417 if (Subtarget.useCRBits()) {
1419 }
1420
1421 setLibcallName(RTLIB::LOG_F128, "logf128");
1422 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1423 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1424 setLibcallName(RTLIB::EXP_F128, "expf128");
1425 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1426 setLibcallName(RTLIB::SIN_F128, "sinf128");
1427 setLibcallName(RTLIB::COS_F128, "cosf128");
1428 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1429 setLibcallName(RTLIB::POW_F128, "powf128");
1430 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1431 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1432 setLibcallName(RTLIB::REM_F128, "fmodf128");
1433 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1434 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1435 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1436 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1437 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1438 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1439 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1440 setLibcallName(RTLIB::RINT_F128, "rintf128");
1441 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1442 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1443 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1444 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1445 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1446
1447 if (Subtarget.isAIXABI()) {
1448 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1449 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1450 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1451 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1452 }
1453
1454 // With 32 condition bits, we don't need to sink (and duplicate) compares
1455 // aggressively in CodeGenPrep.
1456 if (Subtarget.useCRBits()) {
1459 }
1460
1461 // TODO: The default entry number is set to 64. This stops most jump table
1462 // generation on PPC. But it is good for current PPC HWs because the indirect
1463 // branch instruction mtctr to the jump table may lead to bad branch predict.
1464 // Re-evaluate this value on future HWs that can do better with mtctr.
1466
1468
1469 switch (Subtarget.getCPUDirective()) {
1470 default: break;
1471 case PPC::DIR_970:
1472 case PPC::DIR_A2:
1473 case PPC::DIR_E500:
1474 case PPC::DIR_E500mc:
1475 case PPC::DIR_E5500:
1476 case PPC::DIR_PWR4:
1477 case PPC::DIR_PWR5:
1478 case PPC::DIR_PWR5X:
1479 case PPC::DIR_PWR6:
1480 case PPC::DIR_PWR6X:
1481 case PPC::DIR_PWR7:
1482 case PPC::DIR_PWR8:
1483 case PPC::DIR_PWR9:
1484 case PPC::DIR_PWR10:
1485 case PPC::DIR_PWR11:
1489 break;
1490 }
1491
1492 if (Subtarget.enableMachineScheduler())
1494 else
1496
1498
1499 // The Freescale cores do better with aggressive inlining of memcpy and
1500 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1501 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1502 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1503 MaxStoresPerMemset = 32;
1505 MaxStoresPerMemcpy = 32;
1509 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1510 // The A2 also benefits from (very) aggressive inlining of memcpy and
1511 // friends. The overhead of a the function call, even when warm, can be
1512 // over one hundred cycles.
1513 MaxStoresPerMemset = 128;
1514 MaxStoresPerMemcpy = 128;
1515 MaxStoresPerMemmove = 128;
1516 MaxLoadsPerMemcmp = 128;
1517 } else {
1520 }
1521
1522 IsStrictFPEnabled = true;
1523
1524 // Let the subtarget (CPU) decide if a predictable select is more expensive
1525 // than the corresponding branch. This information is used in CGP to decide
1526 // when to convert selects into branches.
1528
1530}
1531
1532// *********************************** NOTE ************************************
1533// For selecting load and store instructions, the addressing modes are defined
1534// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1535// patterns to match the load the store instructions.
1536//
1537// The TD definitions for the addressing modes correspond to their respective
1538// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1539// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1540// address mode flags of a particular node. Afterwards, the computed address
1541// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1542// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1543// accordingly, based on the preferred addressing mode.
1544//
1545// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1546// MemOpFlags contains all the possible flags that can be used to compute the
1547// optimal addressing mode for load and store instructions.
1548// AddrMode contains all the possible load and store addressing modes available
1549// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1550//
1551// When adding new load and store instructions, it is possible that new address
1552// flags may need to be added into MemOpFlags, and a new addressing mode will
1553// need to be added to AddrMode. An entry of the new addressing mode (consisting
1554// of the minimal and main distinguishing address flags for the new load/store
1555// instructions) will need to be added into initializeAddrModeMap() below.
1556// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1557// need to be updated to account for selecting the optimal addressing mode.
1558// *****************************************************************************
1559/// Initialize the map that relates the different addressing modes of the load
1560/// and store instructions to a set of flags. This ensures the load/store
1561/// instruction is correctly matched during instruction selection.
1562void PPCTargetLowering::initializeAddrModeMap() {
1563 AddrModesMap[PPC::AM_DForm] = {
1564 // LWZ, STW
1569 // LBZ, LHZ, STB, STH
1574 // LHA
1579 // LFS, LFD, STFS, STFD
1584 };
1585 AddrModesMap[PPC::AM_DSForm] = {
1586 // LWA
1590 // LD, STD
1594 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1598 };
1599 AddrModesMap[PPC::AM_DQForm] = {
1600 // LXV, STXV
1604 };
1605 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1607 // TODO: Add mapping for quadword load/store.
1608}
1609
1610/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1611/// the desired ByVal argument alignment.
1612static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1613 if (MaxAlign == MaxMaxAlign)
1614 return;
1615 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1616 if (MaxMaxAlign >= 32 &&
1617 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1618 MaxAlign = Align(32);
1619 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1620 MaxAlign < 16)
1621 MaxAlign = Align(16);
1622 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1623 Align EltAlign;
1624 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1625 if (EltAlign > MaxAlign)
1626 MaxAlign = EltAlign;
1627 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1628 for (auto *EltTy : STy->elements()) {
1629 Align EltAlign;
1630 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1631 if (EltAlign > MaxAlign)
1632 MaxAlign = EltAlign;
1633 if (MaxAlign == MaxMaxAlign)
1634 break;
1635 }
1636 }
1637}
1638
1639/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1640/// function arguments in the caller parameter area.
1642 const DataLayout &DL) const {
1643 // 16byte and wider vectors are passed on 16byte boundary.
1644 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1645 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1646 if (Subtarget.hasAltivec())
1647 getMaxByValAlign(Ty, Alignment, Align(16));
1648 return Alignment.value();
1649}
1650
1652 return Subtarget.useSoftFloat();
1653}
1654
1656 return Subtarget.hasSPE();
1657}
1658
1660 return VT.isScalarInteger();
1661}
1662
1664 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1665 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1666 return false;
1667
1668 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1669 if (VTy->getScalarType()->isIntegerTy()) {
1670 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1671 if (ElemSizeInBits == 32) {
1672 Index = Subtarget.isLittleEndian() ? 2 : 1;
1673 return true;
1674 }
1675 if (ElemSizeInBits == 64) {
1676 Index = Subtarget.isLittleEndian() ? 1 : 0;
1677 return true;
1678 }
1679 }
1680 }
1681 return false;
1682}
1683
1684const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1685 switch ((PPCISD::NodeType)Opcode) {
1686 case PPCISD::FIRST_NUMBER: break;
1687 case PPCISD::FSEL: return "PPCISD::FSEL";
1688 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1689 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1690 case PPCISD::FCFID: return "PPCISD::FCFID";
1691 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1692 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1693 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1694 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1695 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1696 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1697 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1698 case PPCISD::FRE: return "PPCISD::FRE";
1699 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1700 case PPCISD::FTSQRT:
1701 return "PPCISD::FTSQRT";
1702 case PPCISD::FSQRT:
1703 return "PPCISD::FSQRT";
1704 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1705 case PPCISD::VPERM: return "PPCISD::VPERM";
1706 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1708 return "PPCISD::XXSPLTI_SP_TO_DP";
1710 return "PPCISD::XXSPLTI32DX";
1711 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1712 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1713 case PPCISD::XXPERM:
1714 return "PPCISD::XXPERM";
1715 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1716 case PPCISD::CMPB: return "PPCISD::CMPB";
1717 case PPCISD::Hi: return "PPCISD::Hi";
1718 case PPCISD::Lo: return "PPCISD::Lo";
1719 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1720 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1721 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1722 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1723 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1724 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1725 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1726 case PPCISD::SRL: return "PPCISD::SRL";
1727 case PPCISD::SRA: return "PPCISD::SRA";
1728 case PPCISD::SHL: return "PPCISD::SHL";
1729 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1730 case PPCISD::CALL: return "PPCISD::CALL";
1731 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1732 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1733 case PPCISD::CALL_RM:
1734 return "PPCISD::CALL_RM";
1736 return "PPCISD::CALL_NOP_RM";
1738 return "PPCISD::CALL_NOTOC_RM";
1739 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1740 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1741 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1742 case PPCISD::BCTRL_RM:
1743 return "PPCISD::BCTRL_RM";
1745 return "PPCISD::BCTRL_LOAD_TOC_RM";
1746 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1747 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1748 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1749 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1750 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1751 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1752 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1753 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1754 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1755 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1757 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1759 return "PPCISD::ANDI_rec_1_EQ_BIT";
1761 return "PPCISD::ANDI_rec_1_GT_BIT";
1762 case PPCISD::VCMP: return "PPCISD::VCMP";
1763 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1764 case PPCISD::LBRX: return "PPCISD::LBRX";
1765 case PPCISD::STBRX: return "PPCISD::STBRX";
1766 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1767 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1768 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1769 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1770 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1771 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1772 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1773 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1774 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1776 return "PPCISD::ST_VSR_SCAL_INT";
1777 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1778 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1779 case PPCISD::BDZ: return "PPCISD::BDZ";
1780 case PPCISD::MFFS: return "PPCISD::MFFS";
1781 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1782 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1783 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1784 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1785 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1786 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1787 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1788 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1789 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1790 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1791 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1792 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1793 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1794 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1795 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1796 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1797 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1798 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1799 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1800 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1801 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1802 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1803 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1805 return "PPCISD::PADDI_DTPREL";
1806 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1807 case PPCISD::SC: return "PPCISD::SC";
1808 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1809 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1810 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1811 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1812 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1813 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1814 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1815 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1816 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1817 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1818 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1819 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1821 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1823 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1824 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1825 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1826 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1827 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1828 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1829 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1830 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1831 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1833 return "PPCISD::STRICT_FADDRTZ";
1835 return "PPCISD::STRICT_FCTIDZ";
1837 return "PPCISD::STRICT_FCTIWZ";
1839 return "PPCISD::STRICT_FCTIDUZ";
1841 return "PPCISD::STRICT_FCTIWUZ";
1843 return "PPCISD::STRICT_FCFID";
1845 return "PPCISD::STRICT_FCFIDU";
1847 return "PPCISD::STRICT_FCFIDS";
1849 return "PPCISD::STRICT_FCFIDUS";
1850 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1851 case PPCISD::STORE_COND:
1852 return "PPCISD::STORE_COND";
1853 }
1854 return nullptr;
1855}
1856
1858 EVT VT) const {
1859 if (!VT.isVector())
1860 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1861
1863}
1864
1866 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1867 return true;
1868}
1869
1870//===----------------------------------------------------------------------===//
1871// Node matching predicates, for use by the tblgen matching code.
1872//===----------------------------------------------------------------------===//
1873
1874/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1876 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1877 return CFP->getValueAPF().isZero();
1878 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1879 // Maybe this has already been legalized into the constant pool?
1880 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1881 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1882 return CFP->getValueAPF().isZero();
1883 }
1884 return false;
1885}
1886
1887/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1888/// true if Op is undef or if it matches the specified value.
1889static bool isConstantOrUndef(int Op, int Val) {
1890 return Op < 0 || Op == Val;
1891}
1892
1893/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1894/// VPKUHUM instruction.
1895/// The ShuffleKind distinguishes between big-endian operations with
1896/// two different inputs (0), either-endian operations with two identical
1897/// inputs (1), and little-endian operations with two different inputs (2).
1898/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1900 SelectionDAG &DAG) {
1901 bool IsLE = DAG.getDataLayout().isLittleEndian();
1902 if (ShuffleKind == 0) {
1903 if (IsLE)
1904 return false;
1905 for (unsigned i = 0; i != 16; ++i)
1906 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1907 return false;
1908 } else if (ShuffleKind == 2) {
1909 if (!IsLE)
1910 return false;
1911 for (unsigned i = 0; i != 16; ++i)
1912 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1913 return false;
1914 } else if (ShuffleKind == 1) {
1915 unsigned j = IsLE ? 0 : 1;
1916 for (unsigned i = 0; i != 8; ++i)
1917 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1918 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1919 return false;
1920 }
1921 return true;
1922}
1923
1924/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1925/// VPKUWUM instruction.
1926/// The ShuffleKind distinguishes between big-endian operations with
1927/// two different inputs (0), either-endian operations with two identical
1928/// inputs (1), and little-endian operations with two different inputs (2).
1929/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1931 SelectionDAG &DAG) {
1932 bool IsLE = DAG.getDataLayout().isLittleEndian();
1933 if (ShuffleKind == 0) {
1934 if (IsLE)
1935 return false;
1936 for (unsigned i = 0; i != 16; i += 2)
1937 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1938 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1939 return false;
1940 } else if (ShuffleKind == 2) {
1941 if (!IsLE)
1942 return false;
1943 for (unsigned i = 0; i != 16; i += 2)
1944 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1945 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1946 return false;
1947 } else if (ShuffleKind == 1) {
1948 unsigned j = IsLE ? 0 : 2;
1949 for (unsigned i = 0; i != 8; i += 2)
1950 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1951 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1952 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1953 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1954 return false;
1955 }
1956 return true;
1957}
1958
1959/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1960/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1961/// current subtarget.
1962///
1963/// The ShuffleKind distinguishes between big-endian operations with
1964/// two different inputs (0), either-endian operations with two identical
1965/// inputs (1), and little-endian operations with two different inputs (2).
1966/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1968 SelectionDAG &DAG) {
1969 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1970 if (!Subtarget.hasP8Vector())
1971 return false;
1972
1973 bool IsLE = DAG.getDataLayout().isLittleEndian();
1974 if (ShuffleKind == 0) {
1975 if (IsLE)
1976 return false;
1977 for (unsigned i = 0; i != 16; i += 4)
1978 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1979 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1980 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1981 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1982 return false;
1983 } else if (ShuffleKind == 2) {
1984 if (!IsLE)
1985 return false;
1986 for (unsigned i = 0; i != 16; i += 4)
1987 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1988 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1989 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1990 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1991 return false;
1992 } else if (ShuffleKind == 1) {
1993 unsigned j = IsLE ? 0 : 4;
1994 for (unsigned i = 0; i != 8; i += 4)
1995 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1996 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1997 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1998 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1999 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
2000 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
2001 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
2002 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
2003 return false;
2004 }
2005 return true;
2006}
2007
2008/// isVMerge - Common function, used to match vmrg* shuffles.
2009///
2010static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2011 unsigned LHSStart, unsigned RHSStart) {
2012 if (N->getValueType(0) != MVT::v16i8)
2013 return false;
2014 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2015 "Unsupported merge size!");
2016
2017 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2018 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2019 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2020 LHSStart+j+i*UnitSize) ||
2021 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2022 RHSStart+j+i*UnitSize))
2023 return false;
2024 }
2025 return true;
2026}
2027
2028/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2029/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2030/// The ShuffleKind distinguishes between big-endian merges with two
2031/// different inputs (0), either-endian merges with two identical inputs (1),
2032/// and little-endian merges with two different inputs (2). For the latter,
2033/// the input operands are swapped (see PPCInstrAltivec.td).
2035 unsigned ShuffleKind, SelectionDAG &DAG) {
2036 if (DAG.getDataLayout().isLittleEndian()) {
2037 if (ShuffleKind == 1) // unary
2038 return isVMerge(N, UnitSize, 0, 0);
2039 else if (ShuffleKind == 2) // swapped
2040 return isVMerge(N, UnitSize, 0, 16);
2041 else
2042 return false;
2043 } else {
2044 if (ShuffleKind == 1) // unary
2045 return isVMerge(N, UnitSize, 8, 8);
2046 else if (ShuffleKind == 0) // normal
2047 return isVMerge(N, UnitSize, 8, 24);
2048 else
2049 return false;
2050 }
2051}
2052
2053/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2054/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2055/// The ShuffleKind distinguishes between big-endian merges with two
2056/// different inputs (0), either-endian merges with two identical inputs (1),
2057/// and little-endian merges with two different inputs (2). For the latter,
2058/// the input operands are swapped (see PPCInstrAltivec.td).
2060 unsigned ShuffleKind, SelectionDAG &DAG) {
2061 if (DAG.getDataLayout().isLittleEndian()) {
2062 if (ShuffleKind == 1) // unary
2063 return isVMerge(N, UnitSize, 8, 8);
2064 else if (ShuffleKind == 2) // swapped
2065 return isVMerge(N, UnitSize, 8, 24);
2066 else
2067 return false;
2068 } else {
2069 if (ShuffleKind == 1) // unary
2070 return isVMerge(N, UnitSize, 0, 0);
2071 else if (ShuffleKind == 0) // normal
2072 return isVMerge(N, UnitSize, 0, 16);
2073 else
2074 return false;
2075 }
2076}
2077
2078/**
2079 * Common function used to match vmrgew and vmrgow shuffles
2080 *
2081 * The indexOffset determines whether to look for even or odd words in
2082 * the shuffle mask. This is based on the of the endianness of the target
2083 * machine.
2084 * - Little Endian:
2085 * - Use offset of 0 to check for odd elements
2086 * - Use offset of 4 to check for even elements
2087 * - Big Endian:
2088 * - Use offset of 0 to check for even elements
2089 * - Use offset of 4 to check for odd elements
2090 * A detailed description of the vector element ordering for little endian and
2091 * big endian can be found at
2092 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2093 * Targeting your applications - what little endian and big endian IBM XL C/C++
2094 * compiler differences mean to you
2095 *
2096 * The mask to the shuffle vector instruction specifies the indices of the
2097 * elements from the two input vectors to place in the result. The elements are
2098 * numbered in array-access order, starting with the first vector. These vectors
2099 * are always of type v16i8, thus each vector will contain 16 elements of size
2100 * 8. More info on the shuffle vector can be found in the
2101 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2102 * Language Reference.
2103 *
2104 * The RHSStartValue indicates whether the same input vectors are used (unary)
2105 * or two different input vectors are used, based on the following:
2106 * - If the instruction uses the same vector for both inputs, the range of the
2107 * indices will be 0 to 15. In this case, the RHSStart value passed should
2108 * be 0.
2109 * - If the instruction has two different vectors then the range of the
2110 * indices will be 0 to 31. In this case, the RHSStart value passed should
2111 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2112 * to 31 specify elements in the second vector).
2113 *
2114 * \param[in] N The shuffle vector SD Node to analyze
2115 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2116 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2117 * vector to the shuffle_vector instruction
2118 * \return true iff this shuffle vector represents an even or odd word merge
2119 */
2120static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2121 unsigned RHSStartValue) {
2122 if (N->getValueType(0) != MVT::v16i8)
2123 return false;
2124
2125 for (unsigned i = 0; i < 2; ++i)
2126 for (unsigned j = 0; j < 4; ++j)
2127 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2128 i*RHSStartValue+j+IndexOffset) ||
2129 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2130 i*RHSStartValue+j+IndexOffset+8))
2131 return false;
2132 return true;
2133}
2134
2135/**
2136 * Determine if the specified shuffle mask is suitable for the vmrgew or
2137 * vmrgow instructions.
2138 *
2139 * \param[in] N The shuffle vector SD Node to analyze
2140 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2141 * \param[in] ShuffleKind Identify the type of merge:
2142 * - 0 = big-endian merge with two different inputs;
2143 * - 1 = either-endian merge with two identical inputs;
2144 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2145 * little-endian merges).
2146 * \param[in] DAG The current SelectionDAG
2147 * \return true iff this shuffle mask
2148 */
2150 unsigned ShuffleKind, SelectionDAG &DAG) {
2151 if (DAG.getDataLayout().isLittleEndian()) {
2152 unsigned indexOffset = CheckEven ? 4 : 0;
2153 if (ShuffleKind == 1) // Unary
2154 return isVMerge(N, indexOffset, 0);
2155 else if (ShuffleKind == 2) // swapped
2156 return isVMerge(N, indexOffset, 16);
2157 else
2158 return false;
2159 }
2160 else {
2161 unsigned indexOffset = CheckEven ? 0 : 4;
2162 if (ShuffleKind == 1) // Unary
2163 return isVMerge(N, indexOffset, 0);
2164 else if (ShuffleKind == 0) // Normal
2165 return isVMerge(N, indexOffset, 16);
2166 else
2167 return false;
2168 }
2169 return false;
2170}
2171
2172/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2173/// amount, otherwise return -1.
2174/// The ShuffleKind distinguishes between big-endian operations with two
2175/// different inputs (0), either-endian operations with two identical inputs
2176/// (1), and little-endian operations with two different inputs (2). For the
2177/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2178int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2179 SelectionDAG &DAG) {
2180 if (N->getValueType(0) != MVT::v16i8)
2181 return -1;
2182
2183 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2184
2185 // Find the first non-undef value in the shuffle mask.
2186 unsigned i;
2187 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2188 /*search*/;
2189
2190 if (i == 16) return -1; // all undef.
2191
2192 // Otherwise, check to see if the rest of the elements are consecutively
2193 // numbered from this value.
2194 unsigned ShiftAmt = SVOp->getMaskElt(i);
2195 if (ShiftAmt < i) return -1;
2196
2197 ShiftAmt -= i;
2198 bool isLE = DAG.getDataLayout().isLittleEndian();
2199
2200 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2201 // Check the rest of the elements to see if they are consecutive.
2202 for (++i; i != 16; ++i)
2203 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2204 return -1;
2205 } else if (ShuffleKind == 1) {
2206 // Check the rest of the elements to see if they are consecutive.
2207 for (++i; i != 16; ++i)
2208 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2209 return -1;
2210 } else
2211 return -1;
2212
2213 if (isLE)
2214 ShiftAmt = 16 - ShiftAmt;
2215
2216 return ShiftAmt;
2217}
2218
2219/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2220/// specifies a splat of a single element that is suitable for input to
2221/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2223 EVT VT = N->getValueType(0);
2224 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2225 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2226
2227 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2228 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2229
2230 // The consecutive indices need to specify an element, not part of two
2231 // different elements. So abandon ship early if this isn't the case.
2232 if (N->getMaskElt(0) % EltSize != 0)
2233 return false;
2234
2235 // This is a splat operation if each element of the permute is the same, and
2236 // if the value doesn't reference the second vector.
2237 unsigned ElementBase = N->getMaskElt(0);
2238
2239 // FIXME: Handle UNDEF elements too!
2240 if (ElementBase >= 16)
2241 return false;
2242
2243 // Check that the indices are consecutive, in the case of a multi-byte element
2244 // splatted with a v16i8 mask.
2245 for (unsigned i = 1; i != EltSize; ++i)
2246 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2247 return false;
2248
2249 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2250 if (N->getMaskElt(i) < 0) continue;
2251 for (unsigned j = 0; j != EltSize; ++j)
2252 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2253 return false;
2254 }
2255 return true;
2256}
2257
2258/// Check that the mask is shuffling N byte elements. Within each N byte
2259/// element of the mask, the indices could be either in increasing or
2260/// decreasing order as long as they are consecutive.
2261/// \param[in] N the shuffle vector SD Node to analyze
2262/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2263/// Word/DoubleWord/QuadWord).
2264/// \param[in] StepLen the delta indices number among the N byte element, if
2265/// the mask is in increasing/decreasing order then it is 1/-1.
2266/// \return true iff the mask is shuffling N byte elements.
2267static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2268 int StepLen) {
2269 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2270 "Unexpected element width.");
2271 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2272
2273 unsigned NumOfElem = 16 / Width;
2274 unsigned MaskVal[16]; // Width is never greater than 16
2275 for (unsigned i = 0; i < NumOfElem; ++i) {
2276 MaskVal[0] = N->getMaskElt(i * Width);
2277 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2278 return false;
2279 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2280 return false;
2281 }
2282
2283 for (unsigned int j = 1; j < Width; ++j) {
2284 MaskVal[j] = N->getMaskElt(i * Width + j);
2285 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2286 return false;
2287 }
2288 }
2289 }
2290
2291 return true;
2292}
2293
2294bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2295 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2296 if (!isNByteElemShuffleMask(N, 4, 1))
2297 return false;
2298
2299 // Now we look at mask elements 0,4,8,12
2300 unsigned M0 = N->getMaskElt(0) / 4;
2301 unsigned M1 = N->getMaskElt(4) / 4;
2302 unsigned M2 = N->getMaskElt(8) / 4;
2303 unsigned M3 = N->getMaskElt(12) / 4;
2304 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2305 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2306
2307 // Below, let H and L be arbitrary elements of the shuffle mask
2308 // where H is in the range [4,7] and L is in the range [0,3].
2309 // H, 1, 2, 3 or L, 5, 6, 7
2310 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2311 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2312 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2313 InsertAtByte = IsLE ? 12 : 0;
2314 Swap = M0 < 4;
2315 return true;
2316 }
2317 // 0, H, 2, 3 or 4, L, 6, 7
2318 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2319 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2320 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2321 InsertAtByte = IsLE ? 8 : 4;
2322 Swap = M1 < 4;
2323 return true;
2324 }
2325 // 0, 1, H, 3 or 4, 5, L, 7
2326 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2327 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2328 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2329 InsertAtByte = IsLE ? 4 : 8;
2330 Swap = M2 < 4;
2331 return true;
2332 }
2333 // 0, 1, 2, H or 4, 5, 6, L
2334 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2335 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2336 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2337 InsertAtByte = IsLE ? 0 : 12;
2338 Swap = M3 < 4;
2339 return true;
2340 }
2341
2342 // If both vector operands for the shuffle are the same vector, the mask will
2343 // contain only elements from the first one and the second one will be undef.
2344 if (N->getOperand(1).isUndef()) {
2345 ShiftElts = 0;
2346 Swap = true;
2347 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2348 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2349 InsertAtByte = IsLE ? 12 : 0;
2350 return true;
2351 }
2352 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2353 InsertAtByte = IsLE ? 8 : 4;
2354 return true;
2355 }
2356 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2357 InsertAtByte = IsLE ? 4 : 8;
2358 return true;
2359 }
2360 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2361 InsertAtByte = IsLE ? 0 : 12;
2362 return true;
2363 }
2364 }
2365
2366 return false;
2367}
2368
2370 bool &Swap, bool IsLE) {
2371 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2372 // Ensure each byte index of the word is consecutive.
2373 if (!isNByteElemShuffleMask(N, 4, 1))
2374 return false;
2375
2376 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2377 unsigned M0 = N->getMaskElt(0) / 4;
2378 unsigned M1 = N->getMaskElt(4) / 4;
2379 unsigned M2 = N->getMaskElt(8) / 4;
2380 unsigned M3 = N->getMaskElt(12) / 4;
2381
2382 // If both vector operands for the shuffle are the same vector, the mask will
2383 // contain only elements from the first one and the second one will be undef.
2384 if (N->getOperand(1).isUndef()) {
2385 assert(M0 < 4 && "Indexing into an undef vector?");
2386 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2387 return false;
2388
2389 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2390 Swap = false;
2391 return true;
2392 }
2393
2394 // Ensure each word index of the ShuffleVector Mask is consecutive.
2395 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2396 return false;
2397
2398 if (IsLE) {
2399 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2400 // Input vectors don't need to be swapped if the leading element
2401 // of the result is one of the 3 left elements of the second vector
2402 // (or if there is no shift to be done at all).
2403 Swap = false;
2404 ShiftElts = (8 - M0) % 8;
2405 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2406 // Input vectors need to be swapped if the leading element
2407 // of the result is one of the 3 left elements of the first vector
2408 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2409 Swap = true;
2410 ShiftElts = (4 - M0) % 4;
2411 }
2412
2413 return true;
2414 } else { // BE
2415 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2416 // Input vectors don't need to be swapped if the leading element
2417 // of the result is one of the 4 elements of the first vector.
2418 Swap = false;
2419 ShiftElts = M0;
2420 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2421 // Input vectors need to be swapped if the leading element
2422 // of the result is one of the 4 elements of the right vector.
2423 Swap = true;
2424 ShiftElts = M0 - 4;
2425 }
2426
2427 return true;
2428 }
2429}
2430
2432 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2433
2434 if (!isNByteElemShuffleMask(N, Width, -1))
2435 return false;
2436
2437 for (int i = 0; i < 16; i += Width)
2438 if (N->getMaskElt(i) != i + Width - 1)
2439 return false;
2440
2441 return true;
2442}
2443
2445 return isXXBRShuffleMaskHelper(N, 2);
2446}
2447
2449 return isXXBRShuffleMaskHelper(N, 4);
2450}
2451
2453 return isXXBRShuffleMaskHelper(N, 8);
2454}
2455
2457 return isXXBRShuffleMaskHelper(N, 16);
2458}
2459
2460/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2461/// if the inputs to the instruction should be swapped and set \p DM to the
2462/// value for the immediate.
2463/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2464/// AND element 0 of the result comes from the first input (LE) or second input
2465/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2466/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2467/// mask.
2469 bool &Swap, bool IsLE) {
2470 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2471
2472 // Ensure each byte index of the double word is consecutive.
2473 if (!isNByteElemShuffleMask(N, 8, 1))
2474 return false;
2475
2476 unsigned M0 = N->getMaskElt(0) / 8;
2477 unsigned M1 = N->getMaskElt(8) / 8;
2478 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2479
2480 // If both vector operands for the shuffle are the same vector, the mask will
2481 // contain only elements from the first one and the second one will be undef.
2482 if (N->getOperand(1).isUndef()) {
2483 if ((M0 | M1) < 2) {
2484 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2485 Swap = false;
2486 return true;
2487 } else
2488 return false;
2489 }
2490
2491 if (IsLE) {
2492 if (M0 > 1 && M1 < 2) {
2493 Swap = false;
2494 } else if (M0 < 2 && M1 > 1) {
2495 M0 = (M0 + 2) % 4;
2496 M1 = (M1 + 2) % 4;
2497 Swap = true;
2498 } else
2499 return false;
2500
2501 // Note: if control flow comes here that means Swap is already set above
2502 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2503 return true;
2504 } else { // BE
2505 if (M0 < 2 && M1 > 1) {
2506 Swap = false;
2507 } else if (M0 > 1 && M1 < 2) {
2508 M0 = (M0 + 2) % 4;
2509 M1 = (M1 + 2) % 4;
2510 Swap = true;
2511 } else
2512 return false;
2513
2514 // Note: if control flow comes here that means Swap is already set above
2515 DM = (M0 << 1) + (M1 & 1);
2516 return true;
2517 }
2518}
2519
2520
2521/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2522/// appropriate for PPC mnemonics (which have a big endian bias - namely
2523/// elements are counted from the left of the vector register).
2524unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2525 SelectionDAG &DAG) {
2526 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2527 assert(isSplatShuffleMask(SVOp, EltSize));
2528 EVT VT = SVOp->getValueType(0);
2529
2530 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2531 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2532 : SVOp->getMaskElt(0);
2533
2534 if (DAG.getDataLayout().isLittleEndian())
2535 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2536 else
2537 return SVOp->getMaskElt(0) / EltSize;
2538}
2539
2540/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2541/// by using a vspltis[bhw] instruction of the specified element size, return
2542/// the constant being splatted. The ByteSize field indicates the number of
2543/// bytes of each element [124] -> [bhw].
2545 SDValue OpVal;
2546
2547 // If ByteSize of the splat is bigger than the element size of the
2548 // build_vector, then we have a case where we are checking for a splat where
2549 // multiple elements of the buildvector are folded together into a single
2550 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2551 unsigned EltSize = 16/N->getNumOperands();
2552 if (EltSize < ByteSize) {
2553 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2554 SDValue UniquedVals[4];
2555 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2556
2557 // See if all of the elements in the buildvector agree across.
2558 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2559 if (N->getOperand(i).isUndef()) continue;
2560 // If the element isn't a constant, bail fully out.
2561 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2562
2563 if (!UniquedVals[i&(Multiple-1)].getNode())
2564 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2565 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2566 return SDValue(); // no match.
2567 }
2568
2569 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2570 // either constant or undef values that are identical for each chunk. See
2571 // if these chunks can form into a larger vspltis*.
2572
2573 // Check to see if all of the leading entries are either 0 or -1. If
2574 // neither, then this won't fit into the immediate field.
2575 bool LeadingZero = true;
2576 bool LeadingOnes = true;
2577 for (unsigned i = 0; i != Multiple-1; ++i) {
2578 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2579
2580 LeadingZero &= isNullConstant(UniquedVals[i]);
2581 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2582 }
2583 // Finally, check the least significant entry.
2584 if (LeadingZero) {
2585 if (!UniquedVals[Multiple-1].getNode())
2586 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2587 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2588 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2589 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2590 }
2591 if (LeadingOnes) {
2592 if (!UniquedVals[Multiple-1].getNode())
2593 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2594 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2595 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2596 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2597 }
2598
2599 return SDValue();
2600 }
2601
2602 // Check to see if this buildvec has a single non-undef value in its elements.
2603 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2604 if (N->getOperand(i).isUndef()) continue;
2605 if (!OpVal.getNode())
2606 OpVal = N->getOperand(i);
2607 else if (OpVal != N->getOperand(i))
2608 return SDValue();
2609 }
2610
2611 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2612
2613 unsigned ValSizeInBytes = EltSize;
2614 uint64_t Value = 0;
2615 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2616 Value = CN->getZExtValue();
2617 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2618 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2619 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2620 }
2621
2622 // If the splat value is larger than the element value, then we can never do
2623 // this splat. The only case that we could fit the replicated bits into our
2624 // immediate field for would be zero, and we prefer to use vxor for it.
2625 if (ValSizeInBytes < ByteSize) return SDValue();
2626
2627 // If the element value is larger than the splat value, check if it consists
2628 // of a repeated bit pattern of size ByteSize.
2629 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2630 return SDValue();
2631
2632 // Properly sign extend the value.
2633 int MaskVal = SignExtend32(Value, ByteSize * 8);
2634
2635 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2636 if (MaskVal == 0) return SDValue();
2637
2638 // Finally, if this value fits in a 5 bit sext field, return it
2639 if (SignExtend32<5>(MaskVal) == MaskVal)
2640 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2641 return SDValue();
2642}
2643
2644//===----------------------------------------------------------------------===//
2645// Addressing Mode Selection
2646//===----------------------------------------------------------------------===//
2647
2648/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2649/// or 64-bit immediate, and if the value can be accurately represented as a
2650/// sign extension from a 16-bit value. If so, this returns true and the
2651/// immediate.
2652bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2653 if (!isa<ConstantSDNode>(N))
2654 return false;
2655
2656 Imm = (int16_t)N->getAsZExtVal();
2657 if (N->getValueType(0) == MVT::i32)
2658 return Imm == (int32_t)N->getAsZExtVal();
2659 else
2660 return Imm == (int64_t)N->getAsZExtVal();
2661}
2663 return isIntS16Immediate(Op.getNode(), Imm);
2664}
2665
2666/// Used when computing address flags for selecting loads and stores.
2667/// If we have an OR, check if the LHS and RHS are provably disjoint.
2668/// An OR of two provably disjoint values is equivalent to an ADD.
2669/// Most PPC load/store instructions compute the effective address as a sum,
2670/// so doing this conversion is useful.
2671static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2672 if (N.getOpcode() != ISD::OR)
2673 return false;
2674 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2675 if (!LHSKnown.Zero.getBoolValue())
2676 return false;
2677 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2678 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2679}
2680
2681/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2682/// be represented as an indexed [r+r] operation.
2684 SDValue &Index,
2685 SelectionDAG &DAG) const {
2686 for (SDNode *U : N->uses()) {
2687 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2688 if (Memop->getMemoryVT() == MVT::f64) {
2689 Base = N.getOperand(0);
2690 Index = N.getOperand(1);
2691 return true;
2692 }
2693 }
2694 }
2695 return false;
2696}
2697
2698/// isIntS34Immediate - This method tests if value of node given can be
2699/// accurately represented as a sign extension from a 34-bit value. If so,
2700/// this returns true and the immediate.
2701bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2702 if (!isa<ConstantSDNode>(N))
2703 return false;
2704
2705 Imm = (int64_t)N->getAsZExtVal();
2706 return isInt<34>(Imm);
2707}
2709 return isIntS34Immediate(Op.getNode(), Imm);
2710}
2711
2712/// SelectAddressRegReg - Given the specified addressed, check to see if it
2713/// can be represented as an indexed [r+r] operation. Returns false if it
2714/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2715/// non-zero and N can be represented by a base register plus a signed 16-bit
2716/// displacement, make a more precise judgement by checking (displacement % \p
2717/// EncodingAlignment).
2720 MaybeAlign EncodingAlignment) const {
2721 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2722 // a [pc+imm].
2724 return false;
2725
2726 int16_t Imm = 0;
2727 if (N.getOpcode() == ISD::ADD) {
2728 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2729 // SPE load/store can only handle 8-bit offsets.
2730 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2731 return true;
2732 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2733 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2734 return false; // r+i
2735 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2736 return false; // r+i
2737
2738 Base = N.getOperand(0);
2739 Index = N.getOperand(1);
2740 return true;
2741 } else if (N.getOpcode() == ISD::OR) {
2742 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2743 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2744 return false; // r+i can fold it if we can.
2745
2746 // If this is an or of disjoint bitfields, we can codegen this as an add
2747 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2748 // disjoint.
2749 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2750
2751 if (LHSKnown.Zero.getBoolValue()) {
2752 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2753 // If all of the bits are known zero on the LHS or RHS, the add won't
2754 // carry.
2755 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2756 Base = N.getOperand(0);
2757 Index = N.getOperand(1);
2758 return true;
2759 }
2760 }
2761 }
2762
2763 return false;
2764}
2765
2766// If we happen to be doing an i64 load or store into a stack slot that has
2767// less than a 4-byte alignment, then the frame-index elimination may need to
2768// use an indexed load or store instruction (because the offset may not be a
2769// multiple of 4). The extra register needed to hold the offset comes from the
2770// register scavenger, and it is possible that the scavenger will need to use
2771// an emergency spill slot. As a result, we need to make sure that a spill slot
2772// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2773// stack slot.
2774static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2775 // FIXME: This does not handle the LWA case.
2776 if (VT != MVT::i64)
2777 return;
2778
2779 // NOTE: We'll exclude negative FIs here, which come from argument
2780 // lowering, because there are no known test cases triggering this problem
2781 // using packed structures (or similar). We can remove this exclusion if
2782 // we find such a test case. The reason why this is so test-case driven is
2783 // because this entire 'fixup' is only to prevent crashes (from the
2784 // register scavenger) on not-really-valid inputs. For example, if we have:
2785 // %a = alloca i1
2786 // %b = bitcast i1* %a to i64*
2787 // store i64* a, i64 b
2788 // then the store should really be marked as 'align 1', but is not. If it
2789 // were marked as 'align 1' then the indexed form would have been
2790 // instruction-selected initially, and the problem this 'fixup' is preventing
2791 // won't happen regardless.
2792 if (FrameIdx < 0)
2793 return;
2794
2796 MachineFrameInfo &MFI = MF.getFrameInfo();
2797
2798 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2799 return;
2800
2801 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2802 FuncInfo->setHasNonRISpills();
2803}
2804
2805/// Returns true if the address N can be represented by a base register plus
2806/// a signed 16-bit displacement [r+imm], and if it is not better
2807/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2808/// displacements that are multiples of that value.
2810 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2811 MaybeAlign EncodingAlignment) const {
2812 // FIXME dl should come from parent load or store, not from address
2813 SDLoc dl(N);
2814
2815 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2816 // a [pc+imm].
2818 return false;
2819
2820 // If this can be more profitably realized as r+r, fail.
2821 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2822 return false;
2823
2824 if (N.getOpcode() == ISD::ADD) {
2825 int16_t imm = 0;
2826 if (isIntS16Immediate(N.getOperand(1), imm) &&
2827 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2828 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2829 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2830 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2831 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2832 } else {
2833 Base = N.getOperand(0);
2834 }
2835 return true; // [r+i]
2836 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2837 // Match LOAD (ADD (X, Lo(G))).
2838 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2839 "Cannot handle constant offsets yet!");
2840 Disp = N.getOperand(1).getOperand(0); // The global address.
2845 Base = N.getOperand(0);
2846 return true; // [&g+r]
2847 }
2848 } else if (N.getOpcode() == ISD::OR) {
2849 int16_t imm = 0;
2850 if (isIntS16Immediate(N.getOperand(1), imm) &&
2851 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2852 // If this is an or of disjoint bitfields, we can codegen this as an add
2853 // (for better address arithmetic) if the LHS and RHS of the OR are
2854 // provably disjoint.
2855 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2856
2857 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2858 // If all of the bits are known zero on the LHS or RHS, the add won't
2859 // carry.
2860 if (FrameIndexSDNode *FI =
2861 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2862 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2863 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2864 } else {
2865 Base = N.getOperand(0);
2866 }
2867 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2868 return true;
2869 }
2870 }
2871 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2872 // Loading from a constant address.
2873
2874 // If this address fits entirely in a 16-bit sext immediate field, codegen
2875 // this as "d, 0"
2876 int16_t Imm;
2877 if (isIntS16Immediate(CN, Imm) &&
2878 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2879 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2880 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2881 CN->getValueType(0));
2882 return true;
2883 }
2884
2885 // Handle 32-bit sext immediates with LIS + addr mode.
2886 if ((CN->getValueType(0) == MVT::i32 ||
2887 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2888 (!EncodingAlignment ||
2889 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2890 int Addr = (int)CN->getZExtValue();
2891
2892 // Otherwise, break this down into an LIS + disp.
2893 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2894
2895 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2896 MVT::i32);
2897 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2898 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2899 return true;
2900 }
2901 }
2902
2903 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2904 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2905 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2906 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2907 } else
2908 Base = N;
2909 return true; // [r+0]
2910}
2911
2912/// Similar to the 16-bit case but for instructions that take a 34-bit
2913/// displacement field (prefixed loads/stores).
2915 SDValue &Base,
2916 SelectionDAG &DAG) const {
2917 // Only on 64-bit targets.
2918 if (N.getValueType() != MVT::i64)
2919 return false;
2920
2921 SDLoc dl(N);
2922 int64_t Imm = 0;
2923
2924 if (N.getOpcode() == ISD::ADD) {
2925 if (!isIntS34Immediate(N.getOperand(1), Imm))
2926 return false;
2927 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2928 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2929 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2930 else
2931 Base = N.getOperand(0);
2932 return true;
2933 }
2934
2935 if (N.getOpcode() == ISD::OR) {
2936 if (!isIntS34Immediate(N.getOperand(1), Imm))
2937 return false;
2938 // If this is an or of disjoint bitfields, we can codegen this as an add
2939 // (for better address arithmetic) if the LHS and RHS of the OR are
2940 // provably disjoint.
2941 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2942 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2943 return false;
2944 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2945 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2946 else
2947 Base = N.getOperand(0);
2948 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2949 return true;
2950 }
2951
2952 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2953 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2954 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2955 return true;
2956 }
2957
2958 return false;
2959}
2960
2961/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2962/// represented as an indexed [r+r] operation.
2964 SDValue &Index,
2965 SelectionDAG &DAG) const {
2966 // Check to see if we can easily represent this as an [r+r] address. This
2967 // will fail if it thinks that the address is more profitably represented as
2968 // reg+imm, e.g. where imm = 0.
2969 if (SelectAddressRegReg(N, Base, Index, DAG))
2970 return true;
2971
2972 // If the address is the result of an add, we will utilize the fact that the
2973 // address calculation includes an implicit add. However, we can reduce
2974 // register pressure if we do not materialize a constant just for use as the
2975 // index register. We only get rid of the add if it is not an add of a
2976 // value and a 16-bit signed constant and both have a single use.
2977 int16_t imm = 0;
2978 if (N.getOpcode() == ISD::ADD &&
2979 (!isIntS16Immediate(N.getOperand(1), imm) ||
2980 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2981 Base = N.getOperand(0);
2982 Index = N.getOperand(1);
2983 return true;
2984 }
2985
2986 // Otherwise, do it the hard way, using R0 as the base register.
2987 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2988 N.getValueType());
2989 Index = N;
2990 return true;
2991}
2992
2993template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2994 Ty *PCRelCand = dyn_cast<Ty>(N);
2995 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2996}
2997
2998/// Returns true if this address is a PC Relative address.
2999/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
3000/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
3002 // This is a materialize PC Relative node. Always select this as PC Relative.
3003 Base = N;
3004 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3005 return true;
3006 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
3007 isValidPCRelNode<GlobalAddressSDNode>(N) ||
3008 isValidPCRelNode<JumpTableSDNode>(N) ||
3009 isValidPCRelNode<BlockAddressSDNode>(N))
3010 return true;
3011 return false;
3012}
3013
3014/// Returns true if we should use a direct load into vector instruction
3015/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3016static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3017
3018 // If there are any other uses other than scalar to vector, then we should
3019 // keep it as a scalar load -> direct move pattern to prevent multiple
3020 // loads.
3021 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3022 if (!LD)
3023 return false;
3024
3025 EVT MemVT = LD->getMemoryVT();
3026 if (!MemVT.isSimple())
3027 return false;
3028 switch(MemVT.getSimpleVT().SimpleTy) {
3029 case MVT::i64:
3030 break;
3031 case MVT::i32:
3032 if (!ST.hasP8Vector())
3033 return false;
3034 break;
3035 case MVT::i16:
3036 case MVT::i8:
3037 if (!ST.hasP9Vector())
3038 return false;
3039 break;
3040 default:
3041 return false;
3042 }
3043
3044 SDValue LoadedVal(N, 0);
3045 if (!LoadedVal.hasOneUse())
3046 return false;
3047
3048 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3049 UI != UE; ++UI)
3050 if (UI.getUse().get().getResNo() == 0 &&
3051 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3052 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3053 return false;
3054
3055 return true;
3056}
3057
3058/// getPreIndexedAddressParts - returns true by value, base pointer and
3059/// offset pointer and addressing mode by reference if the node's address
3060/// can be legally represented as pre-indexed load / store address.
3062 SDValue &Offset,
3064 SelectionDAG &DAG) const {
3065 if (DisablePPCPreinc) return false;
3066
3067 bool isLoad = true;
3068 SDValue Ptr;
3069 EVT VT;
3070 Align Alignment;
3071 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3072 Ptr = LD->getBasePtr();
3073 VT = LD->getMemoryVT();
3074 Alignment = LD->getAlign();
3075 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3076 Ptr = ST->getBasePtr();
3077 VT = ST->getMemoryVT();
3078 Alignment = ST->getAlign();
3079 isLoad = false;
3080 } else
3081 return false;
3082
3083 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3084 // instructions because we can fold these into a more efficient instruction
3085 // instead, (such as LXSD).
3086 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3087 return false;
3088 }
3089
3090 // PowerPC doesn't have preinc load/store instructions for vectors
3091 if (VT.isVector())
3092 return false;
3093
3094 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3095 // Common code will reject creating a pre-inc form if the base pointer
3096 // is a frame index, or if N is a store and the base pointer is either
3097 // the same as or a predecessor of the value being stored. Check for
3098 // those situations here, and try with swapped Base/Offset instead.
3099 bool Swap = false;
3100
3101 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3102 Swap = true;
3103 else if (!isLoad) {
3104 SDValue Val = cast<StoreSDNode>(N)->getValue();
3105 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3106 Swap = true;
3107 }
3108
3109 if (Swap)
3111
3112 AM = ISD::PRE_INC;
3113 return true;
3114 }
3115
3116 // LDU/STU can only handle immediates that are a multiple of 4.
3117 if (VT != MVT::i64) {
3118 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3119 return false;
3120 } else {
3121 // LDU/STU need an address with at least 4-byte alignment.
3122 if (Alignment < Align(4))
3123 return false;
3124
3125 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3126 return false;
3127 }
3128
3129 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3130 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3131 // sext i32 to i64 when addr mode is r+i.
3132 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3133 LD->getExtensionType() == ISD::SEXTLOAD &&
3134 isa<ConstantSDNode>(Offset))
3135 return false;
3136 }
3137
3138 AM = ISD::PRE_INC;
3139 return true;
3140}
3141
3142//===----------------------------------------------------------------------===//
3143// LowerOperation implementation
3144//===----------------------------------------------------------------------===//
3145
3146/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3147/// and LoOpFlags to the target MO flags.
3148static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3149 unsigned &HiOpFlags, unsigned &LoOpFlags,
3150 const GlobalValue *GV = nullptr) {
3151 HiOpFlags = PPCII::MO_HA;
3152 LoOpFlags = PPCII::MO_LO;
3153
3154 // Don't use the pic base if not in PIC relocation model.
3155 if (IsPIC) {
3156 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3157 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3158 }
3159}
3160
3161static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3162 SelectionDAG &DAG) {
3163 SDLoc DL(HiPart);
3164 EVT PtrVT = HiPart.getValueType();
3165 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3166
3167 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3168 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3169
3170 // With PIC, the first instruction is actually "GR+hi(&G)".
3171 if (isPIC)
3172 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3173 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3174
3175 // Generate non-pic code that has direct accesses to the constant pool.
3176 // The address of the global is just (hi(&g)+lo(&g)).
3177 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3178}
3179
3181 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3182 FuncInfo->setUsesTOCBasePtr();
3183}
3184
3187}
3188
3189SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3190 SDValue GA) const {
3191 const bool Is64Bit = Subtarget.isPPC64();
3192 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3193 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3194 : Subtarget.isAIXABI()
3195 ? DAG.getRegister(PPC::R2, VT)
3196 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3197 SDValue Ops[] = { GA, Reg };
3198 return DAG.getMemIntrinsicNode(
3199 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3202}
3203
3204SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3205 SelectionDAG &DAG) const {
3206 EVT PtrVT = Op.getValueType();
3207 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3208 const Constant *C = CP->getConstVal();
3209
3210 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3211 // The actual address of the GlobalValue is stored in the TOC.
3212 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3213 if (Subtarget.isUsingPCRelativeCalls()) {
3214 SDLoc DL(CP);
3215 EVT Ty = getPointerTy(DAG.getDataLayout());
3216 SDValue ConstPool = DAG.getTargetConstantPool(
3217 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3218 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3219 }
3220 setUsesTOCBasePtr(DAG);
3221 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3222 return getTOCEntry(DAG, SDLoc(CP), GA);
3223 }
3224
3225 unsigned MOHiFlag, MOLoFlag;
3226 bool IsPIC = isPositionIndependent();
3227 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3228
3229 if (IsPIC && Subtarget.isSVR4ABI()) {
3230 SDValue GA =
3231 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3232 return getTOCEntry(DAG, SDLoc(CP), GA);
3233 }
3234
3235 SDValue CPIHi =
3236 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3237 SDValue CPILo =
3238 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3239 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3240}
3241
3242// For 64-bit PowerPC, prefer the more compact relative encodings.
3243// This trades 32 bits per jump table entry for one or two instructions
3244// on the jump site.
3246 if (isJumpTableRelative())
3248
3250}
3251
3254 return false;
3255 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3256 return true;
3258}
3259
3261 SelectionDAG &DAG) const {
3262 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3264
3265 switch (getTargetMachine().getCodeModel()) {
3266 case CodeModel::Small:
3267 case CodeModel::Medium:
3269 default:
3270 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3272 }
3273}
3274
3275const MCExpr *
3277 unsigned JTI,
3278 MCContext &Ctx) const {
3279 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3281
3282 switch (getTargetMachine().getCodeModel()) {
3283 case CodeModel::Small:
3284 case CodeModel::Medium:
3286 default:
3287 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3288 }
3289}
3290
3291SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3292 EVT PtrVT = Op.getValueType();
3293 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3294
3295 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3296 if (Subtarget.isUsingPCRelativeCalls()) {
3297 SDLoc DL(JT);
3298 EVT Ty = getPointerTy(DAG.getDataLayout());
3299 SDValue GA =
3300 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3301 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3302 return MatAddr;
3303 }
3304
3305 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3306 // The actual address of the GlobalValue is stored in the TOC.
3307 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3308 setUsesTOCBasePtr(DAG);
3309 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3310 return getTOCEntry(DAG, SDLoc(JT), GA);
3311 }
3312
3313 unsigned MOHiFlag, MOLoFlag;
3314 bool IsPIC = isPositionIndependent();
3315 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3316
3317 if (IsPIC && Subtarget.isSVR4ABI()) {
3318 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3320 return getTOCEntry(DAG, SDLoc(GA), GA);
3321 }
3322
3323 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3324 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3325 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3326}
3327
3328SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3329 SelectionDAG &DAG) const {
3330 EVT PtrVT = Op.getValueType();
3331 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3332 const BlockAddress *BA = BASDN->getBlockAddress();
3333
3334 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3335 if (Subtarget.isUsingPCRelativeCalls()) {
3336 SDLoc DL(BASDN);
3337 EVT Ty = getPointerTy(DAG.getDataLayout());
3338 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3340 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3341 return MatAddr;
3342 }
3343
3344 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3345 // The actual BlockAddress is stored in the TOC.
3346 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3347 setUsesTOCBasePtr(DAG);
3348 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3349 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3350 }
3351
3352 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3353 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3354 return getTOCEntry(
3355 DAG, SDLoc(BASDN),
3356 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3357
3358 unsigned MOHiFlag, MOLoFlag;
3359 bool IsPIC = isPositionIndependent();
3360 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3361 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3362 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3363 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3364}
3365
3366SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3367 SelectionDAG &DAG) const {
3368 if (Subtarget.isAIXABI())
3369 return LowerGlobalTLSAddressAIX(Op, DAG);
3370
3371 return LowerGlobalTLSAddressLinux(Op, DAG);
3372}
3373
3374/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3375/// and then apply the update.
3377 SelectionDAG &DAG,
3378 const TargetMachine &TM) {
3379 // Initialize TLS model opt setting lazily:
3380 // (1) Use initial-exec for single TLS var references within current function.
3381 // (2) Use local-dynamic for multiple TLS var references within current
3382 // function.
3383 PPCFunctionInfo *FuncInfo =
3385 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3387 // Iterate over all instructions within current function, collect all TLS
3388 // global variables (global variables taken as the first parameter to
3389 // Intrinsic::threadlocal_address).
3390 const Function &Func = DAG.getMachineFunction().getFunction();
3391 for (const BasicBlock &BB : Func)
3392 for (const Instruction &I : BB)
3393 if (I.getOpcode() == Instruction::Call)
3394 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3395 if (Function *CF = CI->getCalledFunction())
3396 if (CF->isDeclaration() &&
3397 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3398 if (const GlobalValue *GV =
3399 dyn_cast<GlobalValue>(I.getOperand(0))) {
3400 TLSModel::Model GVModel = TM.getTLSModel(GV);
3401 if (GVModel == TLSModel::LocalDynamic)
3402 TLSGV.insert(GV);
3403 }
3404
3405 unsigned TLSGVCnt = TLSGV.size();
3406 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3407 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3408 FuncInfo->setAIXFuncUseTLSIEForLD();
3410 }
3411
3412 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3413 LLVM_DEBUG(
3414 dbgs() << DAG.getMachineFunction().getName()
3415 << " function is using the TLS-IE model for TLS-LD access.\n");
3416 Model = TLSModel::InitialExec;
3417 }
3418}
3419
3420SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3421 SelectionDAG &DAG) const {
3422 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3423
3424 if (DAG.getTarget().useEmulatedTLS())
3425 report_fatal_error("Emulated TLS is not yet supported on AIX");
3426
3427 SDLoc dl(GA);
3428 const GlobalValue *GV = GA->getGlobal();
3429 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3430 bool Is64Bit = Subtarget.isPPC64();
3432
3433 // Apply update to the TLS model.
3434 if (Subtarget.hasAIXShLibTLSModelOpt())
3436
3437 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3438
3439 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3440 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3441 bool HasAIXSmallTLSGlobalAttr = false;
3442 SDValue VariableOffsetTGA =
3443 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3444 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3445 SDValue TLSReg;
3446
3447 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3448 if (GVar->hasAttribute("aix-small-tls"))
3449 HasAIXSmallTLSGlobalAttr = true;
3450
3451 if (Is64Bit) {
3452 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3453 // involves a load of the variable offset (from the TOC), followed by an
3454 // add of the loaded variable offset to R13 (the thread pointer).
3455 // This code sequence looks like:
3456 // ld reg1,var[TC](2)
3457 // add reg2, reg1, r13 // r13 contains the thread pointer
3458 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3459
3460 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3461 // global variable attribute, produce a faster access sequence for
3462 // local-exec TLS variables where the offset from the TLS base is encoded
3463 // as an immediate operand.
3464 //
3465 // We only utilize the faster local-exec access sequence when the TLS
3466 // variable has a size within the policy limit. We treat types that are
3467 // not sized or are empty as being over the policy size limit.
3468 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3469 IsTLSLocalExecModel) {
3470 Type *GVType = GV->getValueType();
3471 if (GVType->isSized() && !GVType->isEmptyTy() &&
3472 GV->getDataLayout().getTypeAllocSize(GVType) <=
3474 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3475 }
3476 } else {
3477 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3478 // involves loading the variable offset from the TOC, generating a call to
3479 // .__get_tpointer to get the thread pointer (which will be in R3), and
3480 // adding the two together:
3481 // lwz reg1,var[TC](2)
3482 // bla .__get_tpointer
3483 // add reg2, reg1, r3
3484 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3485
3486 // We do not implement the 32-bit version of the faster access sequence
3487 // for local-exec that is controlled by the -maix-small-local-exec-tls
3488 // option, or the "aix-small-tls" global variable attribute.
3489 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3490 report_fatal_error("The small-local-exec TLS access sequence is "
3491 "currently only supported on AIX (64-bit mode).");
3492 }
3493 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3494 }
3495
3496 if (Model == TLSModel::LocalDynamic) {
3497 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3498
3499 // We do not implement the 32-bit version of the faster access sequence
3500 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3501 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3502 report_fatal_error("The small-local-dynamic TLS access sequence is "
3503 "currently only supported on AIX (64-bit mode).");
3504
3505 // For local-dynamic on AIX, we need to generate one TOC entry for each
3506 // variable offset, and a single module-handle TOC entry for the entire
3507 // file.
3508
3509 SDValue VariableOffsetTGA =
3510 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3511 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3512
3514 GlobalVariable *TLSGV =
3515 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3516 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3518 assert(TLSGV && "Not able to create GV for _$TLSML.");
3519 SDValue ModuleHandleTGA =
3520 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3521 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3522 SDValue ModuleHandle =
3523 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3524
3525 // With the -maix-small-local-dynamic-tls option, produce a faster access
3526 // sequence for local-dynamic TLS variables where the offset from the
3527 // module-handle is encoded as an immediate operand.
3528 //
3529 // We only utilize the faster local-dynamic access sequence when the TLS
3530 // variable has a size within the policy limit. We treat types that are
3531 // not sized or are empty as being over the policy size limit.
3532 if (HasAIXSmallLocalDynamicTLS) {
3533 Type *GVType = GV->getValueType();
3534 if (GVType->isSized() && !GVType->isEmptyTy() &&
3535 GV->getDataLayout().getTypeAllocSize(GVType) <=
3537 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3538 ModuleHandle);
3539 }
3540
3541 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3542 }
3543
3544 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3545 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3546 // need to generate two TOC entries, one for the variable offset, one for the
3547 // region handle. The global address for the TOC entry of the region handle is
3548 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3549 // entry of the variable offset is created with MO_TLSGD_FLAG.
3550 SDValue VariableOffsetTGA =
3551 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3552 SDValue RegionHandleTGA =
3553 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3554 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3555 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3556 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3557 RegionHandle);
3558}
3559
3560SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3561 SelectionDAG &DAG) const {
3562 // FIXME: TLS addresses currently use medium model code sequences,
3563 // which is the most useful form. Eventually support for small and
3564 // large models could be added if users need it, at the cost of
3565 // additional complexity.
3566 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3567 if (DAG.getTarget().useEmulatedTLS())
3568 return LowerToTLSEmulatedModel(GA, DAG);
3569
3570 SDLoc dl(GA);
3571 const GlobalValue *GV = GA->getGlobal();
3572 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3573 bool is64bit = Subtarget.isPPC64();
3574 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3575 PICLevel::Level picLevel = M->getPICLevel();
3576
3578 TLSModel::Model Model = TM.getTLSModel(GV);
3579
3580 if (Model == TLSModel::LocalExec) {
3581 if (Subtarget.isUsingPCRelativeCalls()) {
3582 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3583 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3585 SDValue MatAddr =
3586 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3587 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3588 }
3589
3590 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3592 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3594 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3595 : DAG.getRegister(PPC::R2, MVT::i32);
3596
3597 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3598 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3599 }
3600
3601 if (Model == TLSModel::InitialExec) {
3602 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3604 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3605 SDValue TGATLS = DAG.getTargetGlobalAddress(
3606 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3607 SDValue TPOffset;
3608 if (IsPCRel) {
3609 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3610 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3612 } else {
3613 SDValue GOTPtr;
3614 if (is64bit) {
3615 setUsesTOCBasePtr(DAG);
3616 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3617 GOTPtr =
3618 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3619 } else {
3620 if (!TM.isPositionIndependent())
3621 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3622 else if (picLevel == PICLevel::SmallPIC)
3623 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3624 else
3625 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3626 }
3627 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3628 }
3629 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3630 }
3631
3632 if (Model == TLSModel::GeneralDynamic) {
3633 if (Subtarget.isUsingPCRelativeCalls()) {
3634 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3636 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3637 }
3638
3639 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3640 SDValue GOTPtr;
3641 if (is64bit) {
3642 setUsesTOCBasePtr(DAG);
3643 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3644 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3645 GOTReg, TGA);
3646 } else {
3647 if (picLevel == PICLevel::SmallPIC)
3648 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3649 else
3650 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3651 }
3652 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3653 GOTPtr, TGA, TGA);
3654 }
3655
3656 if (Model == TLSModel::LocalDynamic) {
3657 if (Subtarget.isUsingPCRelativeCalls()) {
3658 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3660 SDValue MatPCRel =
3661 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3662 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3663 }
3664
3665 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3666 SDValue GOTPtr;
3667 if (is64bit) {
3668 setUsesTOCBasePtr(DAG);
3669 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3670 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3671 GOTReg, TGA);
3672 } else {
3673 if (picLevel == PICLevel::SmallPIC)
3674 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3675 else
3676 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3677 }
3678 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3679 PtrVT, GOTPtr, TGA, TGA);
3680 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3681 PtrVT, TLSAddr, TGA);
3682 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3683 }
3684
3685 llvm_unreachable("Unknown TLS model!");
3686}
3687
3688SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3689 SelectionDAG &DAG) const {
3690 EVT PtrVT = Op.getValueType();
3691 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3692 SDLoc DL(GSDN);
3693 const GlobalValue *GV = GSDN->getGlobal();
3694
3695 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3696 // The actual address of the GlobalValue is stored in the TOC.
3697 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3698 if (Subtarget.isUsingPCRelativeCalls()) {
3699 EVT Ty = getPointerTy(DAG.getDataLayout());
3701 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3703 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3704 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3706 return Load;
3707 } else {
3708 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3710 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3711 }
3712 }
3713 setUsesTOCBasePtr(DAG);
3714 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3715 return getTOCEntry(DAG, DL, GA);
3716 }
3717
3718 unsigned MOHiFlag, MOLoFlag;
3719 bool IsPIC = isPositionIndependent();
3720 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3721
3722 if (IsPIC && Subtarget.isSVR4ABI()) {
3723 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3724 GSDN->getOffset(),
3726 return getTOCEntry(DAG, DL, GA);
3727 }
3728
3729 SDValue GAHi =
3730 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3731 SDValue GALo =
3732 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3733
3734 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3735}
3736
3737SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3738 bool IsStrict = Op->isStrictFPOpcode();
3740 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3741 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3742 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3743 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3744 EVT LHSVT = LHS.getValueType();
3745 SDLoc dl(Op);
3746
3747 // Soften the setcc with libcall if it is fp128.
3748 if (LHSVT == MVT::f128) {
3749 assert(!Subtarget.hasP9Vector() &&
3750 "SETCC for f128 is already legal under Power9!");
3751 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3752 Op->getOpcode() == ISD::STRICT_FSETCCS);
3753 if (RHS.getNode())
3754 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3755 DAG.getCondCode(CC));
3756 if (IsStrict)
3757 return DAG.getMergeValues({LHS, Chain}, dl);
3758 return LHS;
3759 }
3760
3761 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3762
3763 if (Op.getValueType() == MVT::v2i64) {
3764 // When the operands themselves are v2i64 values, we need to do something
3765 // special because VSX has no underlying comparison operations for these.
3766 if (LHS.getValueType() == MVT::v2i64) {
3767 // Equality can be handled by casting to the legal type for Altivec
3768 // comparisons, everything else needs to be expanded.
3769 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3770 return SDValue();
3771 SDValue SetCC32 = DAG.getSetCC(
3772 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3773 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3774 int ShuffV[] = {1, 0, 3, 2};
3775 SDValue Shuff =
3776 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3777 return DAG.getBitcast(MVT::v2i64,
3779 dl, MVT::v4i32, Shuff, SetCC32));
3780 }
3781
3782 // We handle most of these in the usual way.
3783 return Op;
3784 }
3785
3786 // If we're comparing for equality to zero, expose the fact that this is
3787 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3788 // fold the new nodes.
3789 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3790 return V;
3791
3792 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3793 // Leave comparisons against 0 and -1 alone for now, since they're usually
3794 // optimized. FIXME: revisit this when we can custom lower all setcc
3795 // optimizations.
3796 if (C->isAllOnes() || C->isZero())
3797 return SDValue();
3798 }
3799
3800 // If we have an integer seteq/setne, turn it into a compare against zero
3801 // by xor'ing the rhs with the lhs, which is faster than setting a
3802 // condition register, reading it back out, and masking the correct bit. The
3803 // normal approach here uses sub to do this instead of xor. Using xor exposes
3804 // the result to other bit-twiddling opportunities.
3805 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3806 EVT VT = Op.getValueType();
3807 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3808 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3809 }
3810 return SDValue();
3811}
3812
3813SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3814 SDNode *Node = Op.getNode();
3815 EVT VT = Node->getValueType(0);
3816 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3817 SDValue InChain = Node->getOperand(0);
3818 SDValue VAListPtr = Node->getOperand(1);
3819 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3820 SDLoc dl(Node);
3821
3822 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3823
3824 // gpr_index
3825 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3826 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3827 InChain = GprIndex.getValue(1);
3828
3829 if (VT == MVT::i64) {
3830 // Check if GprIndex is even
3831 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3832 DAG.getConstant(1, dl, MVT::i32));
3833 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3834 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3835 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3836 DAG.getConstant(1, dl, MVT::i32));
3837 // Align GprIndex to be even if it isn't
3838 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3839 GprIndex);
3840 }
3841
3842 // fpr index is 1 byte after gpr
3843 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3844 DAG.getConstant(1, dl, MVT::i32));
3845
3846 // fpr
3847 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3848 FprPtr, MachinePointerInfo(SV), MVT::i8);
3849 InChain = FprIndex.getValue(1);
3850
3851 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3852 DAG.getConstant(8, dl, MVT::i32));
3853
3854 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3855 DAG.getConstant(4, dl, MVT::i32));
3856
3857 // areas
3858 SDValue OverflowArea =
3859 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3860 InChain = OverflowArea.getValue(1);
3861
3862 SDValue RegSaveArea =
3863 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3864 InChain = RegSaveArea.getValue(1);
3865
3866 // select overflow_area if index > 8
3867 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3868 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3869
3870 // adjustment constant gpr_index * 4/8
3871 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3872 VT.isInteger() ? GprIndex : FprIndex,
3873 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3874 MVT::i32));
3875
3876 // OurReg = RegSaveArea + RegConstant
3877 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3878 RegConstant);
3879
3880 // Floating types are 32 bytes into RegSaveArea
3881 if (VT.isFloatingPoint())
3882 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3883 DAG.getConstant(32, dl, MVT::i32));
3884
3885 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3886 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3887 VT.isInteger() ? GprIndex : FprIndex,
3888 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3889 MVT::i32));
3890
3891 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3892 VT.isInteger() ? VAListPtr : FprPtr,
3893 MachinePointerInfo(SV), MVT::i8);
3894
3895 // determine if we should load from reg_save_area or overflow_area
3896 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3897
3898 // increase overflow_area by 4/8 if gpr/fpr > 8
3899 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3900 DAG.getConstant(VT.isInteger() ? 4 : 8,
3901 dl, MVT::i32));
3902
3903 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3904 OverflowAreaPlusN);
3905
3906 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3907 MachinePointerInfo(), MVT::i32);
3908
3909 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3910}
3911
3912SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3913 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3914
3915 // We have to copy the entire va_list struct:
3916 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3917 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3918 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3919 false, true, /*CI=*/nullptr, std::nullopt,
3921}
3922
3923SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3924 SelectionDAG &DAG) const {
3925 if (Subtarget.isAIXABI())
3926 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3927
3928 return Op.getOperand(0);
3929}
3930
3931SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3934
3935 assert((Op.getOpcode() == ISD::INLINEASM ||
3936 Op.getOpcode() == ISD::INLINEASM_BR) &&
3937 "Expecting Inline ASM node.");
3938
3939 // If an LR store is already known to be required then there is not point in
3940 // checking this ASM as well.
3941 if (MFI.isLRStoreRequired())
3942 return Op;
3943
3944 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3945 // type MVT::Glue. We want to ignore this last operand if that is the case.
3946 unsigned NumOps = Op.getNumOperands();
3947 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3948 --NumOps;
3949
3950 // Check all operands that may contain the LR.
3951 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3952 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3953 unsigned NumVals = Flags.getNumOperandRegisters();
3954 ++i; // Skip the ID value.
3955
3956 switch (Flags.getKind()) {
3957 default:
3958 llvm_unreachable("Bad flags!");
3962 i += NumVals;
3963 break;
3967 for (; NumVals; --NumVals, ++i) {
3968 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3969 if (Reg != PPC::LR && Reg != PPC::LR8)
3970 continue;
3971 MFI.setLRStoreRequired();
3972 return Op;
3973 }
3974 break;
3975 }
3976 }
3977 }
3978
3979 return Op;
3980}
3981
3982SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3983 SelectionDAG &DAG) const {
3984 if (Subtarget.isAIXABI())
3985 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3986
3987 SDValue Chain = Op.getOperand(0);
3988 SDValue Trmp = Op.getOperand(1); // trampoline
3989 SDValue FPtr = Op.getOperand(2); // nested function
3990 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3991 SDLoc dl(Op);
3992
3993 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3994 bool isPPC64 = (PtrVT == MVT::i64);
3995 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3996
3999
4000 Entry.Ty = IntPtrTy;
4001 Entry.Node = Trmp; Args.push_back(Entry);
4002
4003 // TrampSize == (isPPC64 ? 48 : 40);
4004 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
4005 isPPC64 ? MVT::i64 : MVT::i32);
4006 Args.push_back(Entry);
4007
4008 Entry.Node = FPtr; Args.push_back(Entry);
4009 Entry.Node = Nest; Args.push_back(Entry);
4010
4011 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4013 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4015 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4016
4017 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4018 return CallResult.second;
4019}
4020
4021SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4023 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4024 EVT PtrVT = getPointerTy(MF.getDataLayout());
4025
4026 SDLoc dl(Op);
4027
4028 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4029 // vastart just stores the address of the VarArgsFrameIndex slot into the
4030 // memory location argument.
4031 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4032 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4033 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4034 MachinePointerInfo(SV));
4035 }
4036
4037 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4038 // We suppose the given va_list is already allocated.
4039 //
4040 // typedef struct {
4041 // char gpr; /* index into the array of 8 GPRs
4042 // * stored in the register save area
4043 // * gpr=0 corresponds to r3,
4044 // * gpr=1 to r4, etc.
4045 // */
4046 // char fpr; /* index into the array of 8 FPRs
4047 // * stored in the register save area
4048 // * fpr=0 corresponds to f1,
4049 // * fpr=1 to f2, etc.
4050 // */
4051 // char *overflow_arg_area;
4052 // /* location on stack that holds
4053 // * the next overflow argument
4054 // */
4055 // char *reg_save_area;
4056 // /* where r3:r10 and f1:f8 (if saved)
4057 // * are stored
4058 // */
4059 // } va_list[1];
4060
4061 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4062 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4063 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4064 PtrVT);
4065 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4066 PtrVT);
4067
4068 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4069 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4070
4071 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4072 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4073
4074 uint64_t FPROffset = 1;
4075 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4076
4077 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4078
4079 // Store first byte : number of int regs
4080 SDValue firstStore =
4081 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4082 MachinePointerInfo(SV), MVT::i8);
4083 uint64_t nextOffset = FPROffset;
4084 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4085 ConstFPROffset);
4086
4087 // Store second byte : number of float regs
4088 SDValue secondStore =
4089 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4090 MachinePointerInfo(SV, nextOffset), MVT::i8);
4091 nextOffset += StackOffset;
4092 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4093
4094 // Store second word : arguments given on stack
4095 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4096 MachinePointerInfo(SV, nextOffset));
4097 nextOffset += FrameOffset;
4098 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4099
4100 // Store third word : arguments given in registers
4101 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4102 MachinePointerInfo(SV, nextOffset));
4103}
4104
4105/// FPR - The set of FP registers that should be allocated for arguments
4106/// on Darwin and AIX.
4107static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4108 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4109 PPC::F11, PPC::F12, PPC::F13};
4110
4111/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4112/// the stack.
4113static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4114 unsigned PtrByteSize) {
4115 unsigned ArgSize = ArgVT.getStoreSize();
4116 if (Flags.isByVal())
4117 ArgSize = Flags.getByValSize();
4118
4119 // Round up to multiples of the pointer size, except for array members,
4120 // which are always packed.
4121 if (!Flags.isInConsecutiveRegs())
4122 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4123
4124 return ArgSize;
4125}
4126
4127/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4128/// on the stack.
4130 ISD::ArgFlagsTy Flags,
4131 unsigned PtrByteSize) {
4132 Align Alignment(PtrByteSize);
4133
4134 // Altivec parameters are padded to a 16 byte boundary.
4135 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4136 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4137 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4138 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4139 Alignment = Align(16);
4140
4141 // ByVal parameters are aligned as requested.
4142 if (Flags.isByVal()) {
4143 auto BVAlign = Flags.getNonZeroByValAlign();
4144 if (BVAlign > PtrByteSize) {
4145 if (BVAlign.value() % PtrByteSize != 0)
4147 "ByVal alignment is not a multiple of the pointer size");
4148
4149 Alignment = BVAlign;
4150 }
4151 }
4152
4153 // Array members are always packed to their original alignment.
4154 if (Flags.isInConsecutiveRegs()) {
4155 // If the array member was split into multiple registers, the first
4156 // needs to be aligned to the size of the full type. (Except for
4157 // ppcf128, which is only aligned as its f64 components.)
4158 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4159 Alignment = Align(OrigVT.getStoreSize());
4160 else
4161 Alignment = Align(ArgVT.getStoreSize());
4162 }
4163
4164 return Alignment;
4165}
4166
4167/// CalculateStackSlotUsed - Return whether this argument will use its
4168/// stack slot (instead of being passed in registers). ArgOffset,
4169/// AvailableFPRs, and AvailableVRs must hold the current argument
4170/// position, and will be updated to account for this argument.
4171static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4172 unsigned PtrByteSize, unsigned LinkageSize,
4173 unsigned ParamAreaSize, unsigned &ArgOffset,
4174 unsigned &AvailableFPRs,
4175 unsigned &AvailableVRs) {
4176 bool UseMemory = false;
4177
4178 // Respect alignment of argument on the stack.
4179 Align Alignment =
4180 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4181 ArgOffset = alignTo(ArgOffset, Alignment);
4182 // If there's no space left in the argument save area, we must
4183 // use memory (this check also catches zero-sized arguments).
4184 if (ArgOffset >= LinkageSize + ParamAreaSize)
4185 UseMemory = true;
4186
4187 // Allocate argument on the stack.
4188 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4189 if (Flags.isInConsecutiveRegsLast())
4190 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4191 // If we overran the argument save area, we must use memory
4192 // (this check catches arguments passed partially in memory)
4193 if (ArgOffset > LinkageSize + ParamAreaSize)
4194 UseMemory = true;
4195
4196 // However, if the argument is actually passed in an FPR or a VR,
4197 // we don't use memory after all.
4198 if (!Flags.isByVal()) {
4199 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4200 if (AvailableFPRs > 0) {
4201 --AvailableFPRs;
4202 return false;
4203 }
4204 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4205 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4206 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4207 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4208 if (AvailableVRs > 0) {
4209 --AvailableVRs;
4210 return false;
4211 }
4212 }
4213
4214 return UseMemory;
4215}
4216
4217/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4218/// ensure minimum alignment required for target.
4220 unsigned NumBytes) {
4221 return alignTo(NumBytes, Lowering->getStackAlign());
4222}
4223
4224SDValue PPCTargetLowering::LowerFormalArguments(
4225 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4226 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4227 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4228 if (Subtarget.isAIXABI())
4229 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4230 InVals);
4231 if (Subtarget.is64BitELFABI())
4232 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4233 InVals);
4234 assert(Subtarget.is32BitELFABI());
4235 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4236 InVals);
4237}
4238
4239SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4240 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4241 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4242 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4243
4244 // 32-bit SVR4 ABI Stack Frame Layout:
4245 // +-----------------------------------+
4246 // +--> | Back chain |
4247 // | +-----------------------------------+
4248 // | | Floating-point register save area |
4249 // | +-----------------------------------+
4250 // | | General register save area |
4251 // | +-----------------------------------+
4252 // | | CR save word |
4253 // | +-----------------------------------+
4254 // | | VRSAVE save word |
4255 // | +-----------------------------------+
4256 // | | Alignment padding |
4257 // | +-----------------------------------+
4258 // | | Vector register save area |
4259 // | +-----------------------------------+
4260 // | | Local variable space |
4261 // | +-----------------------------------+
4262 // | | Parameter list area |
4263 // | +-----------------------------------+
4264 // | | LR save word |
4265 // | +-----------------------------------+
4266 // SP--> +--- | Back chain |
4267 // +-----------------------------------+
4268 //
4269 // Specifications:
4270 // System V Application Binary Interface PowerPC Processor Supplement
4271 // AltiVec Technology Programming Interface Manual
4272
4274 MachineFrameInfo &MFI = MF.getFrameInfo();
4275 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4276
4277 EVT PtrVT = getPointerTy(MF.getDataLayout());
4278 // Potential tail calls could cause overwriting of argument stack slots.
4279 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4280 (CallConv == CallingConv::Fast));
4281 const Align PtrAlign(4);
4282
4283 // Assign locations to all of the incoming arguments.
4285 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4286 *DAG.getContext());
4287
4288 // Reserve space for the linkage area on the stack.
4289 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4290 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4291 if (useSoftFloat())
4292 CCInfo.PreAnalyzeFormalArguments(Ins);
4293
4294 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4295 CCInfo.clearWasPPCF128();
4296
4297 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4298 CCValAssign &VA = ArgLocs[i];
4299
4300 // Arguments stored in registers.
4301 if (VA.isRegLoc()) {
4302 const TargetRegisterClass *RC;
4303 EVT ValVT = VA.getValVT();
4304
4305 switch (ValVT.getSimpleVT().SimpleTy) {
4306 default:
4307 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4308 case MVT::i1:
4309 case MVT::i32:
4310 RC = &PPC::GPRCRegClass;
4311 break;
4312 case MVT::f32:
4313 if (Subtarget.hasP8Vector())
4314 RC = &PPC::VSSRCRegClass;
4315 else if (Subtarget.hasSPE())
4316 RC = &PPC::GPRCRegClass;
4317 else
4318 RC = &PPC::F4RCRegClass;
4319 break;
4320 case MVT::f64:
4321 if (Subtarget.hasVSX())
4322 RC = &PPC::VSFRCRegClass;
4323 else if (Subtarget.hasSPE())
4324 // SPE passes doubles in GPR pairs.
4325 RC = &PPC::GPRCRegClass;
4326 else
4327 RC = &PPC::F8RCRegClass;
4328 break;
4329 case MVT::v16i8:
4330 case MVT::v8i16:
4331 case MVT::v4i32:
4332 RC = &PPC::VRRCRegClass;
4333 break;
4334 case MVT::v4f32:
4335 RC = &PPC::VRRCRegClass;
4336 break;
4337 case MVT::v2f64:
4338 case MVT::v2i64:
4339 RC = &PPC::VRRCRegClass;
4340 break;
4341 }
4342
4343 SDValue ArgValue;
4344 // Transform the arguments stored in physical registers into
4345 // virtual ones.
4346 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4347 assert(i + 1 < e && "No second half of double precision argument");
4348 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4349 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4350 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4351 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4352 if (!Subtarget.isLittleEndian())
4353 std::swap (ArgValueLo, ArgValueHi);
4354 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4355 ArgValueHi);
4356 } else {
4357 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4358 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4359 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4360 if (ValVT == MVT::i1)
4361 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4362 }
4363
4364 InVals.push_back(ArgValue);
4365 } else {
4366 // Argument stored in memory.
4367 assert(VA.isMemLoc());
4368
4369 // Get the extended size of the argument type in stack
4370 unsigned ArgSize = VA.getLocVT().getStoreSize();
4371 // Get the actual size of the argument type
4372 unsigned ObjSize = VA.getValVT().getStoreSize();
4373 unsigned ArgOffset = VA.getLocMemOffset();
4374 // Stack objects in PPC32 are right justified.
4375 ArgOffset += ArgSize - ObjSize;
4376 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4377
4378 // Create load nodes to retrieve arguments from the stack.
4379 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4380 InVals.push_back(
4381 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4382 }
4383 }
4384
4385 // Assign locations to all of the incoming aggregate by value arguments.
4386 // Aggregates passed by value are stored in the local variable space of the
4387 // caller's stack frame, right above the parameter list area.
4388 SmallVector<CCValAssign, 16> ByValArgLocs;
4389 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4390 ByValArgLocs, *DAG.getContext());
4391
4392 // Reserve stack space for the allocations in CCInfo.
4393 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4394
4395 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4396
4397 // Area that is at least reserved in the caller of this function.
4398 unsigned MinReservedArea = CCByValInfo.getStackSize();
4399 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4400
4401 // Set the size that is at least reserved in caller of this function. Tail
4402 // call optimized function's reserved stack space needs to be aligned so that
4403 // taking the difference between two stack areas will result in an aligned
4404 // stack.
4405 MinReservedArea =
4406 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4407 FuncInfo->setMinReservedArea(MinReservedArea);
4408
4410
4411 // If the function takes variable number of arguments, make a frame index for
4412 // the start of the first vararg value... for expansion of llvm.va_start.
4413 if (isVarArg) {
4414 static const MCPhysReg GPArgRegs[] = {
4415 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4416 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4417 };
4418 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4419
4420 static const MCPhysReg FPArgRegs[] = {
4421 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4422 PPC::F8
4423 };
4424 unsigned NumFPArgRegs = std::size(FPArgRegs);
4425
4426 if (useSoftFloat() || hasSPE())
4427 NumFPArgRegs = 0;
4428
4429 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4430 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4431
4432 // Make room for NumGPArgRegs and NumFPArgRegs.
4433 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4434 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4435
4437 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4438
4439 FuncInfo->setVarArgsFrameIndex(
4440 MFI.CreateStackObject(Depth, Align(8), false));
4441 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4442
4443 // The fixed integer arguments of a variadic function are stored to the
4444 // VarArgsFrameIndex on the stack so that they may be loaded by
4445 // dereferencing the result of va_next.
4446 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4447 // Get an existing live-in vreg, or add a new one.
4448 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4449 if (!VReg)
4450 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4451
4452 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4453 SDValue Store =
4454 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4455 MemOps.push_back(Store);
4456 // Increment the address by four for the next argument to store
4457 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4458 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4459 }
4460
4461 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4462 // is set.
4463 // The double arguments are stored to the VarArgsFrameIndex
4464 // on the stack.
4465 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4466 // Get an existing live-in vreg, or add a new one.
4467 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4468 if (!VReg)
4469 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4470
4471 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4472 SDValue Store =
4473 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4474 MemOps.push_back(Store);
4475 // Increment the address by eight for the next argument to store
4476 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4477 PtrVT);
4478 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4479 }
4480 }
4481
4482 if (!MemOps.empty())
4483 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4484
4485 return Chain;
4486}
4487
4488// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4489// value to MVT::i64 and then truncate to the correct register size.
4490SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4491 EVT ObjectVT, SelectionDAG &DAG,
4492 SDValue ArgVal,
4493 const SDLoc &dl) const {
4494 if (Flags.isSExt())
4495 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4496 DAG.getValueType(ObjectVT));
4497 else if (Flags.isZExt())
4498 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4499 DAG.getValueType(ObjectVT));
4500
4501 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4502}
4503
4504SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4505 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4506 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4507 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4508 // TODO: add description of PPC stack frame format, or at least some docs.
4509 //
4510 bool isELFv2ABI = Subtarget.isELFv2ABI();
4511 bool isLittleEndian = Subtarget.isLittleEndian();
4513 MachineFrameInfo &MFI = MF.getFrameInfo();
4514 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4515
4516 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4517 "fastcc not supported on varargs functions");
4518
4519 EVT PtrVT = getPointerTy(MF.getDataLayout());
4520 // Potential tail calls could cause overwriting of argument stack slots.
4521 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4522 (CallConv == CallingConv::Fast));
4523 unsigned PtrByteSize = 8;
4524 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4525
4526 static const MCPhysReg GPR[] = {
4527 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4528 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4529 };
4530 static const MCPhysReg VR[] = {
4531 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4532 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4533 };
4534
4535 const unsigned Num_GPR_Regs = std::size(GPR);
4536 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4537 const unsigned Num_VR_Regs = std::size(VR);
4538
4539 // Do a first pass over the arguments to determine whether the ABI
4540 // guarantees that our caller has allocated the parameter save area
4541 // on its stack frame. In the ELFv1 ABI, this is always the case;
4542 // in the ELFv2 ABI, it is true if this is a vararg function or if
4543 // any parameter is located in a stack slot.
4544
4545 bool HasParameterArea = !isELFv2ABI || isVarArg;
4546 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4547 unsigned NumBytes = LinkageSize;
4548 unsigned AvailableFPRs = Num_FPR_Regs;
4549 unsigned AvailableVRs = Num_VR_Regs;
4550 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4551 if (Ins[i].Flags.isNest())
4552 continue;
4553
4554 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4555 PtrByteSize, LinkageSize, ParamAreaSize,
4556 NumBytes, AvailableFPRs, AvailableVRs))
4557 HasParameterArea = true;
4558 }
4559
4560 // Add DAG nodes to load the arguments or copy them out of registers. On
4561 // entry to a function on PPC, the arguments start after the linkage area,
4562 // although the first ones are often in registers.
4563
4564 unsigned ArgOffset = LinkageSize;
4565 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4568 unsigned CurArgIdx = 0;
4569 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4570 SDValue ArgVal;
4571 bool needsLoad = false;
4572 EVT ObjectVT = Ins[ArgNo].VT;
4573 EVT OrigVT = Ins[ArgNo].ArgVT;
4574 unsigned ObjSize = ObjectVT.getStoreSize();
4575 unsigned ArgSize = ObjSize;
4576 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4577 if (Ins[ArgNo].isOrigArg()) {
4578 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4579 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4580 }
4581 // We re-align the argument offset for each argument, except when using the
4582 // fast calling convention, when we need to make sure we do that only when
4583 // we'll actually use a stack slot.
4584 unsigned CurArgOffset;
4585 Align Alignment;
4586 auto ComputeArgOffset = [&]() {
4587 /* Respect alignment of argument on the stack. */
4588 Alignment =
4589 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4590 ArgOffset = alignTo(ArgOffset, Alignment);
4591 CurArgOffset = ArgOffset;
4592 };
4593
4594 if (CallConv != CallingConv::Fast) {
4595 ComputeArgOffset();
4596
4597 /* Compute GPR index associated with argument offset. */
4598 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4599 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4600 }
4601
4602 // FIXME the codegen can be much improved in some cases.
4603 // We do not have to keep everything in memory.
4604 if (Flags.isByVal()) {
4605 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4606
4607 if (CallConv == CallingConv::Fast)
4608 ComputeArgOffset();
4609
4610 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4611 ObjSize = Flags.getByValSize();
4612 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4613 // Empty aggregate parameters do not take up registers. Examples:
4614 // struct { } a;
4615 // union { } b;
4616 // int c[0];
4617 // etc. However, we have to provide a place-holder in InVals, so
4618 // pretend we have an 8-byte item at the current address for that
4619 // purpose.
4620 if (!ObjSize) {
4621 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4622 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4623 InVals.push_back(FIN);
4624 continue;
4625 }
4626
4627 // Create a stack object covering all stack doublewords occupied
4628 // by the argument. If the argument is (fully or partially) on
4629 // the stack, or if the argument is fully in registers but the
4630 // caller has allocated the parameter save anyway, we can refer
4631 // directly to the caller's stack frame. Otherwise, create a
4632 // local copy in our own frame.
4633 int FI;
4634 if (HasParameterArea ||
4635 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4636 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4637 else
4638 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4639 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4640
4641 // Handle aggregates smaller than 8 bytes.
4642 if (ObjSize < PtrByteSize) {
4643 // The value of the object is its address, which differs from the
4644 // address of the enclosing doubleword on big-endian systems.
4645 SDValue Arg = FIN;
4646 if (!isLittleEndian) {
4647 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4648 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4649 }
4650 InVals.push_back(Arg);
4651
4652 if (GPR_idx != Num_GPR_Regs) {
4653 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4654 FuncInfo->addLiveInAttr(VReg, Flags);
4655 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4656 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4657 SDValue Store =
4658 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4659 MachinePointerInfo(&*FuncArg), ObjType);
4660 MemOps.push_back(Store);
4661 }
4662 // Whether we copied from a register or not, advance the offset
4663 // into the parameter save area by a full doubleword.
4664 ArgOffset += PtrByteSize;
4665 continue;
4666 }
4667
4668 // The value of the object is its address, which is the address of
4669 // its first stack doubleword.
4670 InVals.push_back(FIN);
4671
4672 // Store whatever pieces of the object are in registers to memory.
4673 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4674 if (GPR_idx == Num_GPR_Regs)
4675 break;
4676
4677 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4678 FuncInfo->addLiveInAttr(VReg, Flags);
4679 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4680 SDValue Addr = FIN;
4681 if (j) {
4682 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4683 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4684 }
4685 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4686 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4687 SDValue Store =
4688 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4689 MachinePointerInfo(&*FuncArg, j), ObjType);
4690 MemOps.push_back(Store);
4691 ++GPR_idx;
4692 }
4693 ArgOffset += ArgSize;
4694 continue;
4695 }
4696
4697 switch (ObjectVT.getSimpleVT().SimpleTy) {
4698 default: llvm_unreachable("Unhandled argument type!");
4699 case MVT::i1:
4700 case MVT::i32:
4701 case MVT::i64:
4702 if (Flags.isNest()) {
4703 // The 'nest' parameter, if any, is passed in R11.
4704 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4705 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4706
4707 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4708 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4709
4710 break;
4711 }
4712
4713 // These can be scalar arguments or elements of an integer array type
4714 // passed directly. Clang may use those instead of "byval" aggregate
4715 // types to avoid forcing arguments to memory unnecessarily.
4716 if (GPR_idx != Num_GPR_Regs) {
4717 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4718 FuncInfo->addLiveInAttr(VReg, Flags);
4719 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4720
4721 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4722 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4723 // value to MVT::i64 and then truncate to the correct register size.
4724 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4725 } else {
4726 if (CallConv == CallingConv::Fast)
4727 ComputeArgOffset();
4728
4729 needsLoad = true;
4730 ArgSize = PtrByteSize;
4731 }
4732 if (CallConv != CallingConv::Fast || needsLoad)
4733 ArgOffset += 8;
4734 break;
4735
4736 case MVT::f32:
4737 case MVT::f64:
4738 // These can be scalar arguments or elements of a float array type
4739 // passed directly. The latter are used to implement ELFv2 homogenous
4740 // float aggregates.
4741 if (FPR_idx != Num_FPR_Regs) {
4742 unsigned VReg;
4743
4744 if (ObjectVT == MVT::f32)
4745 VReg = MF.addLiveIn(FPR[FPR_idx],
4746 Subtarget.hasP8Vector()
4747 ? &PPC::VSSRCRegClass
4748 : &PPC::F4RCRegClass);
4749 else
4750 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4751 ? &PPC::VSFRCRegClass
4752 : &PPC::F8RCRegClass);
4753
4754 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4755 ++FPR_idx;
4756 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4757 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4758 // once we support fp <-> gpr moves.
4759
4760 // This can only ever happen in the presence of f32 array types,
4761 // since otherwise we never run out of FPRs before running out
4762 // of GPRs.
4763 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4764 FuncInfo->addLiveInAttr(VReg, Flags);
4765 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4766
4767 if (ObjectVT == MVT::f32) {
4768 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4769 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4770 DAG.getConstant(32, dl, MVT::i32));
4771 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4772 }
4773
4774 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4775 } else {
4776 if (CallConv == CallingConv::Fast)
4777 ComputeArgOffset();
4778
4779 needsLoad = true;
4780 }
4781
4782 // When passing an array of floats, the array occupies consecutive
4783 // space in the argument area; only round up to the next doubleword
4784 // at the end of the array. Otherwise, each float takes 8 bytes.
4785 if (CallConv != CallingConv::Fast || needsLoad) {
4786 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4787 ArgOffset += ArgSize;
4788 if (Flags.isInConsecutiveRegsLast())
4789 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4790 }
4791 break;
4792 case MVT::v4f32:
4793 case MVT::v4i32:
4794 case MVT::v8i16:
4795 case MVT::v16i8:
4796 case MVT::v2f64:
4797 case MVT::v2i64:
4798 case MVT::v1i128:
4799 case MVT::f128:
4800 // These can be scalar arguments or elements of a vector array type
4801 // passed directly. The latter are used to implement ELFv2 homogenous
4802 // vector aggregates.
4803 if (VR_idx != Num_VR_Regs) {
4804 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4805 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4806 ++VR_idx;
4807 } else {
4808 if (CallConv == CallingConv::Fast)
4809 ComputeArgOffset();
4810 needsLoad = true;
4811 }
4812 if (CallConv != CallingConv::Fast || needsLoad)
4813 ArgOffset += 16;
4814 break;
4815 }
4816
4817 // We need to load the argument to a virtual register if we determined
4818 // above that we ran out of physical registers of the appropriate type.
4819 if (needsLoad) {
4820 if (ObjSize < ArgSize && !isLittleEndian)
4821 CurArgOffset += ArgSize - ObjSize;
4822 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4823 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4824 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4825 }
4826
4827 InVals.push_back(ArgVal);
4828 }
4829
4830 // Area that is at least reserved in the caller of this function.
4831 unsigned MinReservedArea;
4832 if (HasParameterArea)
4833 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4834 else
4835 MinReservedArea = LinkageSize;
4836
4837 // Set the size that is at least reserved in caller of this function. Tail
4838 // call optimized functions' reserved stack space needs to be aligned so that
4839 // taking the difference between two stack areas will result in an aligned
4840 // stack.
4841 MinReservedArea =
4842 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4843 FuncInfo->setMinReservedArea(MinReservedArea);
4844
4845 // If the function takes variable number of arguments, make a frame index for
4846 // the start of the first vararg value... for expansion of llvm.va_start.
4847 // On ELFv2ABI spec, it writes:
4848 // C programs that are intended to be *portable* across different compilers
4849 // and architectures must use the header file <stdarg.h> to deal with variable
4850 // argument lists.
4851 if (isVarArg && MFI.hasVAStart()) {
4852 int Depth = ArgOffset;
4853
4854 FuncInfo->setVarArgsFrameIndex(
4855 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4856 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4857
4858 // If this function is vararg, store any remaining integer argument regs
4859 // to their spots on the stack so that they may be loaded by dereferencing
4860 // the result of va_next.
4861 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4862 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4863 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4864 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4865 SDValue Store =
4866 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4867 MemOps.push_back(Store);
4868 // Increment the address by four for the next argument to store
4869 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4870 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4871 }
4872 }
4873
4874 if (!MemOps.empty())
4875 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4876
4877 return Chain;
4878}
4879
4880/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4881/// adjusted to accommodate the arguments for the tailcall.
4882static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4883 unsigned ParamSize) {
4884
4885 if (!isTailCall) return 0;
4886
4888 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4889 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4890 // Remember only if the new adjustment is bigger.
4891 if (SPDiff < FI->getTailCallSPDelta())
4892 FI->setTailCallSPDelta(SPDiff);
4893
4894 return SPDiff;
4895}
4896
4897static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4898
4899static bool callsShareTOCBase(const Function *Caller,
4900 const GlobalValue *CalleeGV,
4901 const TargetMachine &TM) {
4902 // It does not make sense to call callsShareTOCBase() with a caller that
4903 // is PC Relative since PC Relative callers do not have a TOC.
4904#ifndef NDEBUG
4905 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4906 assert(!STICaller->isUsingPCRelativeCalls() &&
4907 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4908#endif
4909
4910 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4911 // don't have enough information to determine if the caller and callee share
4912 // the same TOC base, so we have to pessimistically assume they don't for
4913 // correctness.
4914 if (!CalleeGV)
4915 return false;
4916
4917 // If the callee is preemptable, then the static linker will use a plt-stub
4918 // which saves the toc to the stack, and needs a nop after the call
4919 // instruction to convert to a toc-restore.
4920 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4921 return false;
4922
4923 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4924 // We may need a TOC restore in the situation where the caller requires a
4925 // valid TOC but the callee is PC Relative and does not.
4926 const Function *F = dyn_cast<Function>(CalleeGV);
4927 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4928
4929 // If we have an Alias we can try to get the function from there.
4930 if (Alias) {
4931 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4932 F = dyn_cast<Function>(GlobalObj);
4933 }
4934
4935 // If we still have no valid function pointer we do not have enough
4936 // information to determine if the callee uses PC Relative calls so we must
4937 // assume that it does.
4938 if (!F)
4939 return false;
4940
4941 // If the callee uses PC Relative we cannot guarantee that the callee won't
4942 // clobber the TOC of the caller and so we must assume that the two
4943 // functions do not share a TOC base.
4944 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4945 if (STICallee->isUsingPCRelativeCalls())
4946 return false;
4947
4948 // If the GV is not a strong definition then we need to assume it can be
4949 // replaced by another function at link time. The function that replaces
4950 // it may not share the same TOC as the caller since the callee may be
4951 // replaced by a PC Relative version of the same function.
4952 if (!CalleeGV->isStrongDefinitionForLinker())
4953 return false;
4954
4955 // The medium and large code models are expected to provide a sufficiently
4956 // large TOC to provide all data addressing needs of a module with a
4957 // single TOC.
4958 if (CodeModel::Medium == TM.getCodeModel() ||
4959 CodeModel::Large == TM.getCodeModel())
4960 return true;
4961
4962 // Any explicitly-specified sections and section prefixes must also match.
4963 // Also, if we're using -ffunction-sections, then each function is always in
4964 // a different section (the same is true for COMDAT functions).
4965 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4966 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4967 return false;
4968 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4969 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4970 return false;
4971 }
4972
4973 return true;
4974}
4975
4976static bool
4978 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4979 assert(Subtarget.is64BitELFABI());
4980
4981 const unsigned PtrByteSize = 8;
4982 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4983
4984 static const MCPhysReg GPR[] = {
4985 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4986 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4987 };
4988 static const MCPhysReg VR[] = {
4989 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4990 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4991 };
4992
4993 const unsigned NumGPRs = std::size(GPR);
4994 const unsigned NumFPRs = 13;
4995 const unsigned NumVRs = std::size(VR);
4996 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4997
4998 unsigned NumBytes = LinkageSize;
4999 unsigned AvailableFPRs = NumFPRs;
5000 unsigned AvailableVRs = NumVRs;
5001
5002 for (const ISD::OutputArg& Param : Outs) {
5003 if (Param.Flags.isNest()) continue;
5004
5005 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5006 LinkageSize, ParamAreaSize, NumBytes,
5007 AvailableFPRs, AvailableVRs))
5008 return true;
5009 }
5010 return false;
5011}
5012
5013static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5014 if (CB.arg_size() != CallerFn->arg_size())
5015 return false;
5016
5017 auto CalleeArgIter = CB.arg_begin();
5018 auto CalleeArgEnd = CB.arg_end();
5019 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5020
5021 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5022 const Value* CalleeArg = *CalleeArgIter;
5023 const Value* CallerArg = &(*CallerArgIter);
5024 if (CalleeArg == CallerArg)
5025 continue;
5026
5027 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5028 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5029 // }
5030 // 1st argument of callee is undef and has the same type as caller.
5031 if (CalleeArg->getType() == CallerArg->getType() &&
5032 isa<UndefValue>(CalleeArg))
5033 continue;
5034
5035 return false;
5036 }
5037
5038 return true;
5039}
5040
5041// Returns true if TCO is possible between the callers and callees
5042// calling conventions.
5043static bool
5045 CallingConv::ID CalleeCC) {
5046 // Tail calls are possible with fastcc and ccc.
5047 auto isTailCallableCC = [] (CallingConv::ID CC){
5048 return CC == CallingConv::C || CC == CallingConv::Fast;
5049 };
5050 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5051 return false;
5052
5053 // We can safely tail call both fastcc and ccc callees from a c calling
5054 // convention caller. If the caller is fastcc, we may have less stack space
5055 // than a non-fastcc caller with the same signature so disable tail-calls in
5056 // that case.
5057 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5058}
5059
5060bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5061 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5062 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5064 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5065 bool isCalleeExternalSymbol) const {
5066 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5067
5068 if (DisableSCO && !TailCallOpt) return false;
5069
5070 // Variadic argument functions are not supported.
5071 if (isVarArg) return false;
5072
5073 // Check that the calling conventions are compatible for tco.
5074 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5075 return false;
5076
5077 // Caller contains any byval parameter is not supported.
5078 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5079 return false;
5080
5081 // Callee contains any byval parameter is not supported, too.
5082 // Note: This is a quick work around, because in some cases, e.g.
5083 // caller's stack size > callee's stack size, we are still able to apply
5084 // sibling call optimization. For example, gcc is able to do SCO for caller1
5085 // in the following example, but not for caller2.
5086 // struct test {
5087 // long int a;
5088 // char ary[56];
5089 // } gTest;
5090 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5091 // b->a = v.a;
5092 // return 0;
5093 // }
5094 // void caller1(struct test a, struct test c, struct test *b) {
5095 // callee(gTest, b); }
5096 // void caller2(struct test *b) { callee(gTest, b); }
5097 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5098 return false;
5099
5100 // If callee and caller use different calling conventions, we cannot pass
5101 // parameters on stack since offsets for the parameter area may be different.
5102 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5103 return false;
5104
5105 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5106 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5107 // callee potentially have different TOC bases then we cannot tail call since
5108 // we need to restore the TOC pointer after the call.
5109 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5110 // We cannot guarantee this for indirect calls or calls to external functions.
5111 // When PC-Relative addressing is used, the concept of the TOC is no longer
5112 // applicable so this check is not required.
5113 // Check first for indirect calls.
5114 if (!Subtarget.isUsingPCRelativeCalls() &&
5115 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5116 return false;
5117
5118 // Check if we share the TOC base.
5119 if (!Subtarget.isUsingPCRelativeCalls() &&
5120 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5121 return false;
5122
5123 // TCO allows altering callee ABI, so we don't have to check further.
5124 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5125 return true;
5126
5127 if (DisableSCO) return false;
5128
5129 // If callee use the same argument list that caller is using, then we can
5130 // apply SCO on this case. If it is not, then we need to check if callee needs
5131 // stack for passing arguments.
5132 // PC Relative tail calls may not have a CallBase.
5133 // If there is no CallBase we cannot verify if we have the same argument
5134 // list so assume that we don't have the same argument list.
5135 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5136 needStackSlotPassParameters(Subtarget, Outs))
5137 return false;
5138 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5139 return false;
5140
5141 return true;
5142}
5143
5144/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5145/// for tail call optimization. Targets which want to do tail call
5146/// optimization should implement this function.
5147bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5148 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5149 CallingConv::ID CallerCC, bool isVarArg,
5150 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5151 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5152 return false;
5153
5154 // Variable argument functions are not supported.
5155 if (isVarArg)
5156 return false;
5157
5158 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5159 // Functions containing by val parameters are not supported.
5160 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5161 return false;
5162
5163 // Non-PIC/GOT tail calls are supported.
5164 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5165 return true;
5166
5167 // At the moment we can only do local tail calls (in same module, hidden
5168 // or protected) if we are generating PIC.
5169 if (CalleeGV)
5170 return CalleeGV->hasHiddenVisibility() ||
5171 CalleeGV->hasProtectedVisibility();
5172 }
5173
5174 return false;
5175}
5176
5177/// isCallCompatibleAddress - Return the immediate to use if the specified
5178/// 32-bit value is representable in the immediate field of a BxA instruction.
5180 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5181 if (!C) return nullptr;
5182
5183 int Addr = C->getZExtValue();
5184 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5185 SignExtend32<26>(Addr) != Addr)
5186 return nullptr; // Top 6 bits have to be sext of immediate.
5187
5188 return DAG
5189 .getConstant(
5190 (int)C->getZExtValue() >> 2, SDLoc(Op),
5192 .getNode();
5193}
5194
5195namespace {
5196
5197struct TailCallArgumentInfo {
5198 SDValue Arg;
5199 SDValue FrameIdxOp;
5200 int FrameIdx = 0;
5201
5202 TailCallArgumentInfo() = default;
5203};
5204
5205} // end anonymous namespace
5206
5207/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5209 SelectionDAG &DAG, SDValue Chain,
5210 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5211 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5212 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5213 SDValue Arg = TailCallArgs[i].Arg;
5214 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5215 int FI = TailCallArgs[i].FrameIdx;
5216 // Store relative to framepointer.
5217 MemOpChains.push_back(DAG.getStore(
5218 Chain, dl, Arg, FIN,
5220 }
5221}
5222
5223/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5224/// the appropriate stack slot for the tail call optimized function call.
5226 SDValue OldRetAddr, SDValue OldFP,
5227 int SPDiff, const SDLoc &dl) {
5228 if (SPDiff) {
5229 // Calculate the new stack slot for the return address.
5231 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5232 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5233 bool isPPC64 = Subtarget.isPPC64();
5234 int SlotSize = isPPC64 ? 8 : 4;
5235 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5236 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5237 NewRetAddrLoc, true);
5238 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5239 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5240 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5241 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5242 }
5243 return Chain;
5244}
5245
5246/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5247/// the position of the argument.
5248static void
5250 SDValue Arg, int SPDiff, unsigned ArgOffset,
5251 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5252 int Offset = ArgOffset + SPDiff;
5253 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5254 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5255 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5256 SDValue FIN = DAG.getFrameIndex(FI, VT);
5257 TailCallArgumentInfo Info;
5258 Info.Arg = Arg;
5259 Info.FrameIdxOp = FIN;
5260 Info.FrameIdx = FI;
5261 TailCallArguments.push_back(Info);
5262}
5263
5264/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5265/// stack slot. Returns the chain as result and the loaded frame pointers in
5266/// LROpOut/FPOpout. Used when tail calling.
5267SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5268 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5269 SDValue &FPOpOut, const SDLoc &dl) const {
5270 if (SPDiff) {
5271 // Load the LR and FP stack slot for later adjusting.
5272 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5273 LROpOut = getReturnAddrFrameIndex(DAG);
5274 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5275 Chain = SDValue(LROpOut.getNode(), 1);
5276 }
5277 return Chain;
5278}
5279
5280/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5281/// by "Src" to address "Dst" of size "Size". Alignment information is
5282/// specified by the specific parameter attribute. The copy will be passed as
5283/// a byval function parameter.
5284/// Sometimes what we are copying is the end of a larger object, the part that
5285/// does not fit in registers.
5287 SDValue Chain, ISD::ArgFlagsTy Flags,
5288 SelectionDAG &DAG, const SDLoc &dl) {
5289 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5290 return DAG.getMemcpy(
5291 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5292 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5293}
5294
5295/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5296/// tail calls.
5298 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5299 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5300 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5301 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5303 if (!isTailCall) {
5304 if (isVector) {
5305 SDValue StackPtr;
5306 if (isPPC64)
5307 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5308 else
5309 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5310 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5311 DAG.getConstant(ArgOffset, dl, PtrVT));
5312 }
5313 MemOpChains.push_back(
5314 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5315 // Calculate and remember argument location.
5316 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5317 TailCallArguments);
5318}
5319
5320static void
5322 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5323 SDValue FPOp,
5324 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5325 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5326 // might overwrite each other in case of tail call optimization.
5327 SmallVector<SDValue, 8> MemOpChains2;
5328 // Do not flag preceding copytoreg stuff together with the following stuff.
5329 InGlue = SDValue();
5330 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5331 MemOpChains2, dl);
5332 if (!MemOpChains2.empty())
5333 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5334
5335 // Store the return address to the appropriate stack slot.
5336 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5337
5338 // Emit callseq_end just before tailcall node.
5339 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5340 InGlue = Chain.getValue(1);
5341}
5342
5343// Is this global address that of a function that can be called by name? (as
5344// opposed to something that must hold a descriptor for an indirect call).
5345static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5346 if (GV) {
5347 if (GV->isThreadLocal())
5348 return false;
5349
5350 return GV->getValueType()->isFunctionTy();
5351 }
5352
5353 return false;
5354}
5355
5356SDValue PPCTargetLowering::LowerCallResult(
5357 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5358 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5359 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5361 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5362 *DAG.getContext());
5363
5364 CCRetInfo.AnalyzeCallResult(
5365 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5367 : RetCC_PPC);
5368
5369 // Copy all of the result registers out of their specified physreg.
5370 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5371 CCValAssign &VA = RVLocs[i];
5372 assert(VA.isRegLoc() && "Can only return in registers!");
5373
5374 SDValue Val;
5375
5376 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5377 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5378 InGlue);
5379 Chain = Lo.getValue(1);
5380 InGlue = Lo.getValue(2);
5381 VA = RVLocs[++i]; // skip ahead to next loc
5382 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5383 InGlue);
5384 Chain = Hi.getValue(1);
5385 InGlue = Hi.getValue(2);
5386 if (!Subtarget.isLittleEndian())
5387 std::swap (Lo, Hi);
5388 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5389 } else {
5390 Val = DAG.getCopyFromReg(Chain, dl,
5391 VA.getLocReg(), VA.getLocVT(), InGlue);
5392 Chain = Val.getValue(1);
5393 InGlue = Val.getValue(2);
5394 }
5395
5396 switch (VA.getLocInfo()) {
5397 default: llvm_unreachable("Unknown loc info!");
5398 case CCValAssign::Full: break;
5399 case CCValAssign::AExt:
5400 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5401 break;
5402 case CCValAssign::ZExt:
5403 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5404 DAG.getValueType(VA.getValVT()));
5405 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5406 break;
5407 case CCValAssign::SExt:
5408 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5409 DAG.getValueType(VA.getValVT()));
5410 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5411 break;
5412 }
5413
5414 InVals.push_back(Val);
5415 }
5416
5417 return Chain;
5418}
5419
5420static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5421 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5422 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5423 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5424
5425 // PatchPoint calls are not indirect.
5426 if (isPatchPoint)
5427 return false;
5428
5429 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5430 return false;
5431
5432 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5433 // becuase the immediate function pointer points to a descriptor instead of
5434 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5435 // pointer immediate points to the global entry point, while the BLA would
5436 // need to jump to the local entry point (see rL211174).
5437 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5438 isBLACompatibleAddress(Callee, DAG))
5439 return false;
5440
5441 return true;
5442}
5443
5444// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5445static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5446 return Subtarget.isAIXABI() ||
5447 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5448}
5449
5451 const Function &Caller, const SDValue &Callee,
5452 const PPCSubtarget &Subtarget,
5453 const TargetMachine &TM,
5454 bool IsStrictFPCall = false) {
5455 if (CFlags.IsTailCall)
5456 return PPCISD::TC_RETURN;
5457
5458 unsigned RetOpc = 0;
5459 // This is a call through a function pointer.
5460 if (CFlags.IsIndirect) {
5461 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5462 // indirect calls. The save of the caller's TOC pointer to the stack will be
5463 // inserted into the DAG as part of call lowering. The restore of the TOC
5464 // pointer is modeled by using a pseudo instruction for the call opcode that
5465 // represents the 2 instruction sequence of an indirect branch and link,
5466 // immediately followed by a load of the TOC pointer from the stack save
5467 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5468 // as it is not saved or used.
5470 : PPCISD::BCTRL;
5471 } else if (Subtarget.isUsingPCRelativeCalls()) {
5472 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5473 RetOpc = PPCISD::CALL_NOTOC;
5474 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5475 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5476 // immediately following the call instruction if the caller and callee may
5477 // have different TOC bases. At link time if the linker determines the calls
5478 // may not share a TOC base, the call is redirected to a trampoline inserted
5479 // by the linker. The trampoline will (among other things) save the callers
5480 // TOC pointer at an ABI designated offset in the linkage area and the
5481 // linker will rewrite the nop to be a load of the TOC pointer from the
5482 // linkage area into gpr2.
5483 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5484 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5485 RetOpc =
5486 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5487 } else
5488 RetOpc = PPCISD::CALL;
5489 if (IsStrictFPCall) {
5490 switch (RetOpc) {
5491 default:
5492 llvm_unreachable("Unknown call opcode");
5495 break;
5496 case PPCISD::BCTRL:
5497 RetOpc = PPCISD::BCTRL_RM;
5498 break;
5499 case PPCISD::CALL_NOTOC:
5500 RetOpc = PPCISD::CALL_NOTOC_RM;
5501 break;
5502 case PPCISD::CALL:
5503 RetOpc = PPCISD::CALL_RM;
5504 break;
5505 case PPCISD::CALL_NOP:
5506 RetOpc = PPCISD::CALL_NOP_RM;
5507 break;
5508 }
5509 }
5510 return RetOpc;
5511}
5512
5513static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5514 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5515 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5516 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5517 return SDValue(Dest, 0);
5518
5519 // Returns true if the callee is local, and false otherwise.
5520 auto isLocalCallee = [&]() {
5521 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5522 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5523
5524 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5525 !isa_and_nonnull<GlobalIFunc>(GV);
5526 };
5527
5528 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5529 // a static relocation model causes some versions of GNU LD (2.17.50, at
5530 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5531 // built with secure-PLT.
5532 bool UsePlt =
5533 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5535
5536 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5537 const TargetMachine &TM = Subtarget.getTargetMachine();
5538 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5539 MCSymbolXCOFF *S =
5540 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5541
5543 return DAG.getMCSymbol(S, PtrVT);
5544 };
5545
5546 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5547 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5548 if (isFunctionGlobalAddress(GV)) {
5549 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5550
5551 if (Subtarget.isAIXABI()) {
5552 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5553 return getAIXFuncEntryPointSymbolSDNode(GV);
5554 }
5555 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5556 UsePlt ? PPCII::MO_PLT : 0);
5557 }
5558
5559 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5560 const char *SymName = S->getSymbol();
5561 if (Subtarget.isAIXABI()) {
5562 // If there exists a user-declared function whose name is the same as the
5563 // ExternalSymbol's, then we pick up the user-declared version.
5565 if (const Function *F =
5566 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5567 return getAIXFuncEntryPointSymbolSDNode(F);
5568
5569 // On AIX, direct function calls reference the symbol for the function's
5570 // entry point, which is named by prepending a "." before the function's
5571 // C-linkage name. A Qualname is returned here because an external
5572 // function entry point is a csect with XTY_ER property.
5573 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5574 auto &Context = DAG.getMachineFunction().getContext();
5575 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5576 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5578 return Sec->getQualNameSymbol();
5579 };
5580
5581 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5582 }
5583 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5584 UsePlt ? PPCII::MO_PLT : 0);
5585 }
5586
5587 // No transformation needed.
5588 assert(Callee.getNode() && "What no callee?");
5589 return Callee;
5590}
5591
5593 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5594 "Expected a CALLSEQ_STARTSDNode.");
5595
5596 // The last operand is the chain, except when the node has glue. If the node
5597 // has glue, then the last operand is the glue, and the chain is the second
5598 // last operand.
5599 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5600 if (LastValue.getValueType() != MVT::Glue)
5601 return LastValue;
5602
5603 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5604}
5605
5606// Creates the node that moves a functions address into the count register
5607// to prepare for an indirect call instruction.
5608static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5609 SDValue &Glue, SDValue &Chain,
5610 const SDLoc &dl) {
5611 SDValue MTCTROps[] = {Chain, Callee, Glue};
5612 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5613 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5614 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5615 // The glue is the second value produced.
5616 Glue = Chain.getValue(1);
5617}
5618
5620 SDValue &Glue, SDValue &Chain,
5621 SDValue CallSeqStart,
5622 const CallBase *CB, const SDLoc &dl,
5623 bool hasNest,
5624 const PPCSubtarget &Subtarget) {
5625 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5626 // entry point, but to the function descriptor (the function entry point
5627 // address is part of the function descriptor though).
5628 // The function descriptor is a three doubleword structure with the
5629 // following fields: function entry point, TOC base address and
5630 // environment pointer.
5631 // Thus for a call through a function pointer, the following actions need
5632 // to be performed:
5633 // 1. Save the TOC of the caller in the TOC save area of its stack
5634 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5635 // 2. Load the address of the function entry point from the function
5636 // descriptor.
5637 // 3. Load the TOC of the callee from the function descriptor into r2.
5638 // 4. Load the environment pointer from the function descriptor into
5639 // r11.
5640 // 5. Branch to the function entry point address.
5641 // 6. On return of the callee, the TOC of the caller needs to be
5642 // restored (this is done in FinishCall()).
5643 //
5644 // The loads are scheduled at the beginning of the call sequence, and the
5645 // register copies are flagged together to ensure that no other
5646 // operations can be scheduled in between. E.g. without flagging the
5647 // copies together, a TOC access in the caller could be scheduled between
5648 // the assignment of the callee TOC and the branch to the callee, which leads
5649 // to incorrect code.
5650
5651 // Start by loading the function address from the descriptor.
5652 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5653 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5657
5658 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5659
5660 // Registers used in building the DAG.
5661 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5662 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5663
5664 // Offsets of descriptor members.
5665 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5666 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5667
5668 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5669 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5670
5671 // One load for the functions entry point address.
5672 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5673 Alignment, MMOFlags);
5674
5675 // One for loading the TOC anchor for the module that contains the called
5676 // function.
5677 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5678 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5679 SDValue TOCPtr =
5680 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5681 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5682
5683 // One for loading the environment pointer.
5684 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5685 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5686 SDValue LoadEnvPtr =
5687 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5688 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5689
5690
5691 // Then copy the newly loaded TOC anchor to the TOC pointer.
5692 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5693 Chain = TOCVal.getValue(0);
5694 Glue = TOCVal.getValue(1);
5695
5696 // If the function call has an explicit 'nest' parameter, it takes the
5697 // place of the environment pointer.
5698 assert((!hasNest || !Subtarget.isAIXABI()) &&
5699 "Nest parameter is not supported on AIX.");
5700 if (!hasNest) {
5701 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5702 Chain = EnvVal.getValue(0);
5703 Glue = EnvVal.getValue(1);
5704 }
5705
5706 // The rest of the indirect call sequence is the same as the non-descriptor
5707 // DAG.
5708 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5709}
5710
5711static void
5713 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5714 SelectionDAG &DAG,
5715 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5716 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5717 const PPCSubtarget &Subtarget) {
5718 const bool IsPPC64 = Subtarget.isPPC64();
5719 // MVT for a general purpose register.
5720 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5721
5722 // First operand is always the chain.
5723 Ops.push_back(Chain);
5724
5725 // If it's a direct call pass the callee as the second operand.
5726 if (!CFlags.IsIndirect)
5727 Ops.push_back(Callee);
5728 else {
5729 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5730
5731 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5732 // on the stack (this would have been done in `LowerCall_64SVR4` or
5733 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5734 // represents both the indirect branch and a load that restores the TOC
5735 // pointer from the linkage area. The operand for the TOC restore is an add
5736 // of the TOC save offset to the stack pointer. This must be the second
5737 // operand: after the chain input but before any other variadic arguments.
5738 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5739 // saved or used.
5740 if (isTOCSaveRestoreRequired(Subtarget)) {
5741 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5742
5743 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5744 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5745 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5746 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5747 Ops.push_back(AddTOC);
5748 }
5749
5750 // Add the register used for the environment pointer.
5751 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5753 RegVT));
5754
5755
5756 // Add CTR register as callee so a bctr can be emitted later.
5757 if (CFlags.IsTailCall)
5758 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5759 }
5760
5761 // If this is a tail call add stack pointer delta.
5762 if (CFlags.IsTailCall)
5763 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5764
5765 // Add argument registers to the end of the list so that they are known live
5766 // into the call.
5767 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5768 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5769 RegsToPass[i].second.getValueType()));
5770
5771 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5772 // no way to mark dependencies as implicit here.
5773 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5774 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5775 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5776 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5777
5778 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5779 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5780 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5781
5782 // Add a register mask operand representing the call-preserved registers.
5783 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5784 const uint32_t *Mask =
5785 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5786 assert(Mask && "Missing call preserved mask for calling convention");
5787 Ops.push_back(DAG.getRegisterMask(Mask));
5788
5789 // If the glue is valid, it is the last operand.
5790 if (Glue.getNode())
5791 Ops.push_back(Glue);
5792}
5793
5794SDValue PPCTargetLowering::FinishCall(
5795 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5796 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5797 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5798 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5799 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5800
5801 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5802 Subtarget.isAIXABI())
5803 setUsesTOCBasePtr(DAG);
5804
5805 unsigned CallOpc =
5806 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5807 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5808
5809 if (!CFlags.IsIndirect)
5810 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5811 else if (Subtarget.usesFunctionDescriptors())
5812 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5813 dl, CFlags.HasNest, Subtarget);
5814 else
5815 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5816
5817 // Build the operand list for the call instruction.
5819 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5820 SPDiff, Subtarget);
5821
5822 // Emit tail call.
5823 if (CFlags.IsTailCall) {
5824 // Indirect tail call when using PC Relative calls do not have the same
5825 // constraints.
5826 assert(((Callee.getOpcode() == ISD::Register &&
5827 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5828 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5829 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5830 isa<ConstantSDNode>(Callee) ||
5831 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5832 "Expecting a global address, external symbol, absolute value, "
5833 "register or an indirect tail call when PC Relative calls are "
5834 "used.");
5835 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5836 assert(CallOpc == PPCISD::TC_RETURN &&
5837 "Unexpected call opcode for a tail call.");
5839 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5840 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5841 return Ret;
5842 }
5843
5844 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5845 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5846 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5847 Glue = Chain.getValue(1);
5848
5849 // When performing tail call optimization the callee pops its arguments off
5850 // the stack. Account for this here so these bytes can be pushed back on in
5851 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5852 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5854 ? NumBytes
5855 : 0;
5856
5857 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5858 Glue = Chain.getValue(1);
5859
5860 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5861 DAG, InVals);
5862}
5863
5865 CallingConv::ID CalleeCC = CB->getCallingConv();
5866 const Function *CallerFunc = CB->getCaller();
5867 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5868 const Function *CalleeFunc = CB->getCalledFunction();
5869 if (!CalleeFunc)
5870 return false;
5871 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5872
5875
5876 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5877 CalleeFunc->getAttributes(), Outs, *this,
5878 CalleeFunc->getDataLayout());
5879
5880 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5881 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5882 false /*isCalleeExternalSymbol*/);
5883}
5884
5885bool PPCTargetLowering::isEligibleForTCO(
5886 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5887 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5889 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5890 bool isCalleeExternalSymbol) const {
5891 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5892 return false;
5893
5894 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5895 return IsEligibleForTailCallOptimization_64SVR4(
5896 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5897 isCalleeExternalSymbol);
5898 else
5899 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5900 isVarArg, Ins);
5901}
5902
5903SDValue
5904PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5905 SmallVectorImpl<SDValue> &InVals) const {
5906 SelectionDAG &DAG = CLI.DAG;
5907 SDLoc &dl = CLI.DL;
5909 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5911 SDValue Chain = CLI.Chain;
5912 SDValue Callee = CLI.Callee;
5913 bool &isTailCall = CLI.IsTailCall;
5914 CallingConv::ID CallConv = CLI.CallConv;
5915 bool isVarArg = CLI.IsVarArg;
5916 bool isPatchPoint = CLI.IsPatchPoint;
5917 const CallBase *CB = CLI.CB;
5918
5919 if (isTailCall) {
5921 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5922 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5923 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5924 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5925
5926 isTailCall =
5927 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5928 &(MF.getFunction()), IsCalleeExternalSymbol);
5929 if (isTailCall) {
5930 ++NumTailCalls;
5931 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5932 ++NumSiblingCalls;
5933
5934 // PC Relative calls no longer guarantee that the callee is a Global
5935 // Address Node. The callee could be an indirect tail call in which
5936 // case the SDValue for the callee could be a load (to load the address
5937 // of a function pointer) or it may be a register copy (to move the
5938 // address of the callee from a function parameter into a virtual
5939 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5940 assert((Subtarget.isUsingPCRelativeCalls() ||
5941 isa<GlobalAddressSDNode>(Callee)) &&
5942 "Callee should be an llvm::Function object.");
5943
5944 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5945 << "\nTCO callee: ");
5946 LLVM_DEBUG(Callee.dump());
5947 }
5948 }
5949
5950 if (!isTailCall && CB && CB->isMustTailCall())
5951 report_fatal_error("failed to perform tail call elimination on a call "
5952 "site marked musttail");
5953
5954 // When long calls (i.e. indirect calls) are always used, calls are always
5955 // made via function pointer. If we have a function name, first translate it
5956 // into a pointer.
5957 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5958 !isTailCall)
5959 Callee = LowerGlobalAddress(Callee, DAG);
5960
5961 CallFlags CFlags(
5962 CallConv, isTailCall, isVarArg, isPatchPoint,
5963 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5964 // hasNest
5965 Subtarget.is64BitELFABI() &&
5966 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5967 CLI.NoMerge);
5968
5969 if (Subtarget.isAIXABI())
5970 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5971 InVals, CB);
5972
5973 assert(Subtarget.isSVR4ABI());
5974 if (Subtarget.isPPC64())
5975 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5976 InVals, CB);
5977 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5978 InVals, CB);
5979}
5980
5981SDValue PPCTargetLowering::LowerCall_32SVR4(
5982 SDValue Chain, SDValue Callee, CallFlags CFlags,
5984 const SmallVectorImpl<SDValue> &OutVals,
5985 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5987 const CallBase *CB) const {
5988 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5989 // of the 32-bit SVR4 ABI stack frame layout.
5990
5991 const CallingConv::ID CallConv = CFlags.CallConv;
5992 const bool IsVarArg = CFlags.IsVarArg;
5993 const bool IsTailCall = CFlags.IsTailCall;
5994
5995 assert((CallConv == CallingConv::C ||
5996 CallConv == CallingConv::Cold ||
5997 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5998
5999 const Align PtrAlign(4);
6000
6002
6003 // Mark this function as potentially containing a function that contains a
6004 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6005 // and restoring the callers stack pointer in this functions epilog. This is
6006 // done because by tail calling the called function might overwrite the value
6007 // in this function's (MF) stack pointer stack slot 0(SP).
6008 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6009 CallConv == CallingConv::Fast)
6010 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6011
6012 // Count how many bytes are to be pushed on the stack, including the linkage
6013 // area, parameter list area and the part of the local variable space which
6014 // contains copies of aggregates which are passed by value.
6015
6016 // Assign locations to all of the outgoing arguments.
6018 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6019
6020 // Reserve space for the linkage area on the stack.
6021 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6022 PtrAlign);
6023 if (useSoftFloat())
6024 CCInfo.PreAnalyzeCallOperands(Outs);
6025
6026 if (IsVarArg) {
6027 // Handle fixed and variable vector arguments differently.
6028 // Fixed vector arguments go into registers as long as registers are
6029 // available. Variable vector arguments always go into memory.
6030 unsigned NumArgs = Outs.size();
6031
6032 for (unsigned i = 0; i != NumArgs; ++i) {
6033 MVT ArgVT = Outs[i].VT;
6034 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6035 bool Result;
6036
6037 if (Outs[i].IsFixed) {
6038 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6039 CCInfo);
6040 } else {
6042 ArgFlags, CCInfo);
6043 }
6044
6045 if (Result) {
6046#ifndef NDEBUG
6047 errs() << "Call operand #" << i << " has unhandled type "
6048 << ArgVT << "\n";
6049#endif
6050 llvm_unreachable(nullptr);
6051 }
6052 }
6053 } else {
6054 // All arguments are treated the same.
6055 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6056 }
6057 CCInfo.clearWasPPCF128();
6058
6059 // Assign locations to all of the outgoing aggregate by value arguments.
6060 SmallVector<CCValAssign, 16> ByValArgLocs;
6061 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6062
6063 // Reserve stack space for the allocations in CCInfo.
6064 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6065
6066 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6067
6068 // Size of the linkage area, parameter list area and the part of the local
6069 // space variable where copies of aggregates which are passed by value are
6070 // stored.
6071 unsigned NumBytes = CCByValInfo.getStackSize();
6072
6073 // Calculate by how many bytes the stack has to be adjusted in case of tail
6074 // call optimization.
6075 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6076
6077 // Adjust the stack pointer for the new arguments...
6078 // These operations are automatically eliminated by the prolog/epilog pass
6079 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6080 SDValue CallSeqStart = Chain;
6081
6082 // Load the return address and frame pointer so it can be moved somewhere else
6083 // later.
6084 SDValue LROp, FPOp;
6085 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6086
6087 // Set up a copy of the stack pointer for use loading and storing any
6088 // arguments that may not fit in the registers available for argument
6089 // passing.
6090 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6091
6093 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6094 SmallVector<SDValue, 8> MemOpChains;
6095
6096 bool seenFloatArg = false;
6097 // Walk the register/memloc assignments, inserting copies/loads.
6098 // i - Tracks the index into the list of registers allocated for the call
6099 // RealArgIdx - Tracks the index into the list of actual function arguments
6100 // j - Tracks the index into the list of byval arguments
6101 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6102 i != e;
6103 ++i, ++RealArgIdx) {
6104 CCValAssign &VA = ArgLocs[i];
6105 SDValue Arg = OutVals[RealArgIdx];
6106 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6107
6108 if (Flags.isByVal()) {
6109 // Argument is an aggregate which is passed by value, thus we need to
6110 // create a copy of it in the local variable space of the current stack
6111 // frame (which is the stack frame of the caller) and pass the address of
6112 // this copy to the callee.
6113 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6114 CCValAssign &ByValVA = ByValArgLocs[j++];
6115 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6116
6117 // Memory reserved in the local variable space of the callers stack frame.
6118 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6119
6120 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6121 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6122 StackPtr, PtrOff);
6123
6124 // Create a copy of the argument in the local area of the current
6125 // stack frame.
6126 SDValue MemcpyCall =
6127 CreateCopyOfByValArgument(Arg, PtrOff,
6128 CallSeqStart.getNode()->getOperand(0),
6129 Flags, DAG, dl);
6130
6131 // This must go outside the CALLSEQ_START..END.
6132 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6133 SDLoc(MemcpyCall));
6134 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6135 NewCallSeqStart.getNode());
6136 Chain = CallSeqStart = NewCallSeqStart;
6137
6138 // Pass the address of the aggregate copy on the stack either in a
6139 // physical register or in the parameter list area of the current stack
6140 // frame to the callee.
6141 Arg = PtrOff;
6142 }
6143
6144 // When useCRBits() is true, there can be i1 arguments.
6145 // It is because getRegisterType(MVT::i1) => MVT::i1,
6146 // and for other integer types getRegisterType() => MVT::i32.
6147 // Extend i1 and ensure callee will get i32.
6148 if (Arg.getValueType() == MVT::i1)
6149 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6150 dl, MVT::i32, Arg);
6151
6152 if (VA.isRegLoc()) {
6153 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6154 // Put argument in a physical register.
6155 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6156 bool IsLE = Subtarget.isLittleEndian();
6157 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6158 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6159 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6160 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6161 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6162 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6163 SVal.getValue(0)));
6164 } else
6165 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6166 } else {
6167 // Put argument in the parameter list area of the current stack frame.
6168 assert(VA.isMemLoc());
6169 unsigned LocMemOffset = VA.getLocMemOffset();
6170
6171 if (!IsTailCall) {
6172 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6173 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6174 StackPtr, PtrOff);
6175
6176 MemOpChains.push_back(
6177 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6178 } else {
6179 // Calculate and remember argument location.
6180 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6181 TailCallArguments);
6182 }
6183 }
6184 }
6185
6186 if (!MemOpChains.empty())
6187 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6188
6189 // Build a sequence of copy-to-reg nodes chained together with token chain
6190 // and flag operands which copy the outgoing args into the appropriate regs.
6191 SDValue InGlue;
6192 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6193 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6194 RegsToPass[i].second, InGlue);
6195 InGlue = Chain.getValue(1);
6196 }
6197
6198 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6199 // registers.
6200 if (IsVarArg) {
6201 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6202 SDValue Ops[] = { Chain, InGlue };
6203
6204 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6205 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6206
6207 InGlue = Chain.getValue(1);
6208 }
6209
6210 if (IsTailCall)
6211 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6212 TailCallArguments);
6213
6214 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6215 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6216}
6217
6218// Copy an argument into memory, being careful to do this outside the
6219// call sequence for the call to which the argument belongs.
6220SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6221 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6222 SelectionDAG &DAG, const SDLoc &dl) const {
6223 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6224 CallSeqStart.getNode()->getOperand(0),
6225 Flags, DAG, dl);
6226 // The MEMCPY must go outside the CALLSEQ_START..END.
6227 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6228 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6229 SDLoc(MemcpyCall));
6230 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6231 NewCallSeqStart.getNode());
6232 return NewCallSeqStart;
6233}
6234
6235SDValue PPCTargetLowering::LowerCall_64SVR4(
6236 SDValue Chain, SDValue Callee, CallFlags CFlags,
6238 const SmallVectorImpl<SDValue> &OutVals,
6239 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6241 const CallBase *CB) const {
6242 bool isELFv2ABI = Subtarget.isELFv2ABI();
6243 bool isLittleEndian = Subtarget.isLittleEndian();
6244 unsigned NumOps = Outs.size();
6245 bool IsSibCall = false;
6246 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6247
6248 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6249 unsigned PtrByteSize = 8;
6250
6252
6253 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6254 IsSibCall = true;
6255
6256 // Mark this function as potentially containing a function that contains a
6257 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6258 // and restoring the callers stack pointer in this functions epilog. This is
6259 // done because by tail calling the called function might overwrite the value
6260 // in this function's (MF) stack pointer stack slot 0(SP).
6261 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6262 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6263
6264 assert(!(IsFastCall && CFlags.IsVarArg) &&
6265 "fastcc not supported on varargs functions");
6266
6267 // Count how many bytes are to be pushed on the stack, including the linkage
6268 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6269 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6270 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6271 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6272 unsigned NumBytes = LinkageSize;
6273 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6274
6275 static const MCPhysReg GPR[] = {
6276 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6277 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6278 };
6279 static const MCPhysReg VR[] = {
6280 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6281 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6282 };
6283
6284 const unsigned NumGPRs = std::size(GPR);
6285 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6286 const unsigned NumVRs = std::size(VR);
6287
6288 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6289 // can be passed to the callee in registers.
6290 // For the fast calling convention, there is another check below.
6291 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6292 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6293 if (!HasParameterArea) {
6294 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6295 unsigned AvailableFPRs = NumFPRs;
6296 unsigned AvailableVRs = NumVRs;
6297 unsigned NumBytesTmp = NumBytes;
6298 for (unsigned i = 0; i != NumOps; ++i) {
6299 if (Outs[i].Flags.isNest()) continue;
6300 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6301 PtrByteSize, LinkageSize, ParamAreaSize,
6302 NumBytesTmp, AvailableFPRs, AvailableVRs))
6303 HasParameterArea = true;
6304 }
6305 }
6306
6307 // When using the fast calling convention, we don't provide backing for
6308 // arguments that will be in registers.
6309 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6310
6311 // Avoid allocating parameter area for fastcc functions if all the arguments
6312 // can be passed in the registers.
6313 if (IsFastCall)
6314 HasParameterArea = false;
6315
6316 // Add up all the space actually used.
6317 for (unsigned i = 0; i != NumOps; ++i) {
6318 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6319 EVT ArgVT = Outs[i].VT;
6320 EVT OrigVT = Outs[i].ArgVT;
6321
6322 if (Flags.isNest())
6323 continue;
6324
6325 if (IsFastCall) {
6326 if (Flags.isByVal()) {
6327 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6328 if (NumGPRsUsed > NumGPRs)
6329 HasParameterArea = true;
6330 } else {
6331 switch (ArgVT.getSimpleVT().SimpleTy) {
6332 default: llvm_unreachable("Unexpected ValueType for argument!");
6333 case MVT::i1:
6334 case MVT::i32:
6335 case MVT::i64:
6336 if (++NumGPRsUsed <= NumGPRs)
6337 continue;
6338 break;
6339 case MVT::v4i32:
6340 case MVT::v8i16:
6341 case MVT::v16i8:
6342 case MVT::v2f64:
6343 case MVT::v2i64:
6344 case MVT::v1i128:
6345 case MVT::f128:
6346 if (++NumVRsUsed <= NumVRs)
6347 continue;
6348 break;
6349 case MVT::v4f32:
6350 if (++NumVRsUsed <= NumVRs)
6351 continue;
6352 break;
6353 case MVT::f32:
6354 case MVT::f64:
6355 if (++NumFPRsUsed <= NumFPRs)
6356 continue;
6357 break;
6358 }
6359 HasParameterArea = true;
6360 }
6361 }
6362
6363 /* Respect alignment of argument on the stack. */
6364 auto Alignement =
6365 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6366 NumBytes = alignTo(NumBytes, Alignement);
6367
6368 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6369 if (Flags.isInConsecutiveRegsLast())
6370 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6371 }
6372
6373 unsigned NumBytesActuallyUsed = NumBytes;
6374
6375 // In the old ELFv1 ABI,
6376 // the prolog code of the callee may store up to 8 GPR argument registers to
6377 // the stack, allowing va_start to index over them in memory if its varargs.
6378 // Because we cannot tell if this is needed on the caller side, we have to
6379 // conservatively assume that it is needed. As such, make sure we have at
6380 // least enough stack space for the caller to store the 8 GPRs.
6381 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6382 // really requires memory operands, e.g. a vararg function.
6383 if (HasParameterArea)
6384 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6385 else
6386 NumBytes = LinkageSize;
6387
6388 // Tail call needs the stack to be aligned.
6389 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6390 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6391
6392 int SPDiff = 0;
6393
6394 // Calculate by how many bytes the stack has to be adjusted in case of tail
6395 // call optimization.
6396 if (!IsSibCall)
6397 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6398
6399 // To protect arguments on the stack from being clobbered in a tail call,
6400 // force all the loads to happen before doing any other lowering.
6401 if (CFlags.IsTailCall)
6402 Chain = DAG.getStackArgumentTokenFactor(Chain);
6403
6404 // Adjust the stack pointer for the new arguments...
6405 // These operations are automatically eliminated by the prolog/epilog pass
6406 if (!IsSibCall)
6407 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6408 SDValue CallSeqStart = Chain;
6409
6410 // Load the return address and frame pointer so it can be move somewhere else
6411 // later.
6412 SDValue LROp, FPOp;
6413 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6414
6415 // Set up a copy of the stack pointer for use loading and storing any
6416 // arguments that may not fit in the registers available for argument
6417 // passing.
6418 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6419
6420 // Figure out which arguments are going to go in registers, and which in
6421 // memory. Also, if this is a vararg function, floating point operations
6422 // must be stored to our stack, and loaded into integer regs as well, if
6423 // any integer regs are available for argument passing.
6424 unsigned ArgOffset = LinkageSize;
6425
6427 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6428
6429 SmallVector<SDValue, 8> MemOpChains;
6430 for (unsigned i = 0; i != NumOps; ++i) {
6431 SDValue Arg = OutVals[i];
6432 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6433 EVT ArgVT = Outs[i].VT;
6434 EVT OrigVT = Outs[i].ArgVT;
6435
6436 // PtrOff will be used to store the current argument to the stack if a
6437 // register cannot be found for it.
6438 SDValue PtrOff;
6439
6440 // We re-align the argument offset for each argument, except when using the
6441 // fast calling convention, when we need to make sure we do that only when
6442 // we'll actually use a stack slot.
6443 auto ComputePtrOff = [&]() {
6444 /* Respect alignment of argument on the stack. */
6445 auto Alignment =
6446 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6447 ArgOffset = alignTo(ArgOffset, Alignment);
6448
6449 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6450
6451 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6452 };
6453
6454 if (!IsFastCall) {
6455 ComputePtrOff();
6456
6457 /* Compute GPR index associated with argument offset. */
6458 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6459 GPR_idx = std::min(GPR_idx, NumGPRs);
6460 }
6461
6462 // Promote integers to 64-bit values.
6463 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6464 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6465 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6466 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6467 }
6468
6469 // FIXME memcpy is used way more than necessary. Correctness first.
6470 // Note: "by value" is code for passing a structure by value, not
6471 // basic types.
6472 if (Flags.isByVal()) {
6473 // Note: Size includes alignment padding, so
6474 // struct x { short a; char b; }
6475 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6476 // These are the proper values we need for right-justifying the
6477 // aggregate in a parameter register.
6478 unsigned Size = Flags.getByValSize();
6479
6480 // An empty aggregate parameter takes up no storage and no
6481 // registers.
6482 if (Size == 0)
6483 continue;
6484
6485 if (IsFastCall)
6486 ComputePtrOff();
6487
6488 // All aggregates smaller than 8 bytes must be passed right-justified.
6489 if (Size==1 || Size==2 || Size==4) {
6490 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6491 if (GPR_idx != NumGPRs) {
6492 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6493 MachinePointerInfo(), VT);
6494 MemOpChains.push_back(Load.getValue(1));
6495 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6496
6497 ArgOffset += PtrByteSize;
6498 continue;
6499 }
6500 }
6501
6502 if (GPR_idx == NumGPRs && Size < 8) {
6503 SDValue AddPtr = PtrOff;
6504 if (!isLittleEndian) {
6505 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6506 PtrOff.getValueType());
6507 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6508 }
6509 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6510 CallSeqStart,
6511 Flags, DAG, dl);
6512 ArgOffset += PtrByteSize;
6513 continue;
6514 }
6515 // Copy the object to parameter save area if it can not be entirely passed
6516 // by registers.
6517 // FIXME: we only need to copy the parts which need to be passed in
6518 // parameter save area. For the parts passed by registers, we don't need
6519 // to copy them to the stack although we need to allocate space for them
6520 // in parameter save area.
6521 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6522 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6523 CallSeqStart,
6524 Flags, DAG, dl);
6525
6526 // When a register is available, pass a small aggregate right-justified.
6527 if (Size < 8 && GPR_idx != NumGPRs) {
6528 // The easiest way to get this right-justified in a register
6529 // is to copy the structure into the rightmost portion of a
6530 // local variable slot, then load the whole slot into the
6531 // register.
6532 // FIXME: The memcpy seems to produce pretty awful code for
6533 // small aggregates, particularly for packed ones.
6534 // FIXME: It would be preferable to use the slot in the
6535 // parameter save area instead of a new local variable.
6536 SDValue AddPtr = PtrOff;
6537 if (!isLittleEndian) {
6538 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6539 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6540 }
6541 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6542 CallSeqStart,
6543 Flags, DAG, dl);
6544
6545 // Load the slot into the register.
6546 SDValue Load =
6547 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6548 MemOpChains.push_back(Load.getValue(1));
6549 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6550
6551 // Done with this argument.
6552 ArgOffset += PtrByteSize;
6553 continue;
6554 }
6555
6556 // For aggregates larger than PtrByteSize, copy the pieces of the
6557 // object that fit into registers from the parameter save area.
6558 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6559 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6560 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6561 if (GPR_idx != NumGPRs) {
6562 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6563 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6564 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6565 MachinePointerInfo(), ObjType);
6566
6567 MemOpChains.push_back(Load.getValue(1));
6568 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6569 ArgOffset += PtrByteSize;
6570 } else {
6571 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6572 break;
6573 }
6574 }
6575 continue;
6576 }
6577
6578 switch (Arg.getSimpleValueType().SimpleTy) {
6579 default: llvm_unreachable("Unexpected ValueType for argument!");
6580 case MVT::i1:
6581 case MVT::i32:
6582 case MVT::i64:
6583 if (Flags.isNest()) {
6584 // The 'nest' parameter, if any, is passed in R11.
6585 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6586 break;
6587 }
6588
6589 // These can be scalar arguments or elements of an integer array type
6590 // passed directly. Clang may use those instead of "byval" aggregate
6591 // types to avoid forcing arguments to memory unnecessarily.
6592 if (GPR_idx != NumGPRs) {
6593 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6594 } else {
6595 if (IsFastCall)
6596 ComputePtrOff();
6597
6598 assert(HasParameterArea &&
6599 "Parameter area must exist to pass an argument in memory.");
6600 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6601 true, CFlags.IsTailCall, false, MemOpChains,
6602 TailCallArguments, dl);
6603 if (IsFastCall)
6604 ArgOffset += PtrByteSize;
6605 }
6606 if (!IsFastCall)
6607 ArgOffset += PtrByteSize;
6608 break;
6609 case MVT::f32:
6610 case MVT::f64: {
6611 // These can be scalar arguments or elements of a float array type
6612 // passed directly. The latter are used to implement ELFv2 homogenous
6613 // float aggregates.
6614
6615 // Named arguments go into FPRs first, and once they overflow, the
6616 // remaining arguments go into GPRs and then the parameter save area.
6617 // Unnamed arguments for vararg functions always go to GPRs and
6618 // then the parameter save area. For now, put all arguments to vararg
6619 // routines always in both locations (FPR *and* GPR or stack slot).
6620 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6621 bool NeededLoad = false;
6622
6623 // First load the argument into the next available FPR.
6624 if (FPR_idx != NumFPRs)
6625 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6626
6627 // Next, load the argument into GPR or stack slot if needed.
6628 if (!NeedGPROrStack)
6629 ;
6630 else if (GPR_idx != NumGPRs && !IsFastCall) {
6631 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6632 // once we support fp <-> gpr moves.
6633
6634 // In the non-vararg case, this can only ever happen in the
6635 // presence of f32 array types, since otherwise we never run
6636 // out of FPRs before running out of GPRs.
6637 SDValue ArgVal;
6638
6639 // Double values are always passed in a single GPR.
6640 if (Arg.getValueType() != MVT::f32) {
6641 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6642
6643 // Non-array float values are extended and passed in a GPR.
6644 } else if (!Flags.isInConsecutiveRegs()) {
6645 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6646 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6647
6648 // If we have an array of floats, we collect every odd element
6649 // together with its predecessor into one GPR.
6650 } else if (ArgOffset % PtrByteSize != 0) {
6651 SDValue Lo, Hi;
6652 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6653 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6654 if (!isLittleEndian)
6655 std::swap(Lo, Hi);
6656 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6657
6658 // The final element, if even, goes into the first half of a GPR.
6659 } else if (Flags.isInConsecutiveRegsLast()) {
6660 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6661 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6662 if (!isLittleEndian)
6663 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6664 DAG.getConstant(32, dl, MVT::i32));
6665
6666 // Non-final even elements are skipped; they will be handled
6667 // together the with subsequent argument on the next go-around.
6668 } else
6669 ArgVal = SDValue();
6670
6671 if (ArgVal.getNode())
6672 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6673 } else {
6674 if (IsFastCall)
6675 ComputePtrOff();
6676
6677 // Single-precision floating-point values are mapped to the
6678 // second (rightmost) word of the stack doubleword.
6679 if (Arg.getValueType() == MVT::f32 &&
6680 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6681 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6682 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6683 }
6684
6685 assert(HasParameterArea &&
6686 "Parameter area must exist to pass an argument in memory.");
6687 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6688 true, CFlags.IsTailCall, false, MemOpChains,
6689 TailCallArguments, dl);
6690
6691 NeededLoad = true;
6692 }
6693 // When passing an array of floats, the array occupies consecutive
6694 // space in the argument area; only round up to the next doubleword
6695 // at the end of the array. Otherwise, each float takes 8 bytes.
6696 if (!IsFastCall || NeededLoad) {
6697 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6698 Flags.isInConsecutiveRegs()) ? 4 : 8;
6699 if (Flags.isInConsecutiveRegsLast())
6700 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6701 }
6702 break;
6703 }
6704 case MVT::v4f32:
6705 case MVT::v4i32:
6706 case MVT::v8i16:
6707 case MVT::v16i8:
6708 case MVT::v2f64:
6709 case MVT::v2i64:
6710 case MVT::v1i128:
6711 case MVT::f128:
6712 // These can be scalar arguments or elements of a vector array type
6713 // passed directly. The latter are used to implement ELFv2 homogenous
6714 // vector aggregates.
6715
6716 // For a varargs call, named arguments go into VRs or on the stack as
6717 // usual; unnamed arguments always go to the stack or the corresponding
6718 // GPRs when within range. For now, we always put the value in both
6719 // locations (or even all three).
6720 if (CFlags.IsVarArg) {
6721 assert(HasParameterArea &&
6722 "Parameter area must exist if we have a varargs call.");
6723 // We could elide this store in the case where the object fits
6724 // entirely in R registers. Maybe later.
6725 SDValue Store =
6726 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6727 MemOpChains.push_back(Store);
6728 if (VR_idx != NumVRs) {
6729 SDValue Load =
6730 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6731 MemOpChains.push_back(Load.getValue(1));
6732 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6733 }
6734 ArgOffset += 16;
6735 for (unsigned i=0; i<16; i+=PtrByteSize) {
6736 if (GPR_idx == NumGPRs)
6737 break;
6738 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6739 DAG.getConstant(i, dl, PtrVT));
6740 SDValue Load =
6741 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6742 MemOpChains.push_back(Load.getValue(1));
6743 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6744 }
6745 break;
6746 }
6747
6748 // Non-varargs Altivec params go into VRs or on the stack.
6749 if (VR_idx != NumVRs) {
6750 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6751 } else {
6752 if (IsFastCall)
6753 ComputePtrOff();
6754
6755 assert(HasParameterArea &&
6756 "Parameter area must exist to pass an argument in memory.");
6757 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6758 true, CFlags.IsTailCall, true, MemOpChains,
6759 TailCallArguments, dl);
6760 if (IsFastCall)
6761 ArgOffset += 16;
6762 }
6763
6764 if (!IsFastCall)
6765 ArgOffset += 16;
6766 break;
6767 }
6768 }
6769
6770 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6771 "mismatch in size of parameter area");
6772 (void)NumBytesActuallyUsed;
6773
6774 if (!MemOpChains.empty())
6775 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6776
6777 // Check if this is an indirect call (MTCTR/BCTRL).
6778 // See prepareDescriptorIndirectCall and buildCallOperands for more
6779 // information about calls through function pointers in the 64-bit SVR4 ABI.
6780 if (CFlags.IsIndirect) {
6781 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6782 // caller in the TOC save area.
6783 if (isTOCSaveRestoreRequired(Subtarget)) {
6784 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6785 // Load r2 into a virtual register and store it to the TOC save area.
6786 setUsesTOCBasePtr(DAG);
6787 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6788 // TOC save area offset.
6789 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6790 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6791 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6792 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6794 DAG.getMachineFunction(), TOCSaveOffset));
6795 }
6796 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6797 // This does not mean the MTCTR instruction must use R12; it's easier
6798 // to model this as an extra parameter, so do that.
6799 if (isELFv2ABI && !CFlags.IsPatchPoint)
6800 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6801 }
6802
6803 // Build a sequence of copy-to-reg nodes chained together with token chain
6804 // and flag operands which copy the outgoing args into the appropriate regs.
6805 SDValue InGlue;
6806 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6807 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6808 RegsToPass[i].second, InGlue);
6809 InGlue = Chain.getValue(1);
6810 }
6811
6812 if (CFlags.IsTailCall && !IsSibCall)
6813 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6814 TailCallArguments);
6815
6816 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6817 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6818}
6819
6820// Returns true when the shadow of a general purpose argument register
6821// in the parameter save area is aligned to at least 'RequiredAlign'.
6822static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6823 assert(RequiredAlign.value() <= 16 &&
6824 "Required alignment greater than stack alignment.");
6825 switch (Reg) {
6826 default:
6827 report_fatal_error("called on invalid register.");
6828 case PPC::R5:
6829 case PPC::R9:
6830 case PPC::X3:
6831 case PPC::X5:
6832 case PPC::X7:
6833 case PPC::X9:
6834 // These registers are 16 byte aligned which is the most strict aligment
6835 // we can support.
6836 return true;
6837 case PPC::R3:
6838 case PPC::R7:
6839 case PPC::X4:
6840 case PPC::X6:
6841 case PPC::X8:
6842 case PPC::X10:
6843 // The shadow of these registers in the PSA is 8 byte aligned.
6844 return RequiredAlign <= 8;
6845 case PPC::R4:
6846 case PPC::R6:
6847 case PPC::R8:
6848 case PPC::R10:
6849 return RequiredAlign <= 4;
6850 }
6851}
6852
6853static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6854 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6855 CCState &S) {
6856 AIXCCState &State = static_cast<AIXCCState &>(S);
6857 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6859 const bool IsPPC64 = Subtarget.isPPC64();
6860 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6861 const Align PtrAlign(PtrSize);
6862 const Align StackAlign(16);
6863 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6864
6865 if (ValVT == MVT::f128)
6866 report_fatal_error("f128 is unimplemented on AIX.");
6867
6868 if (ArgFlags.isNest())
6869 report_fatal_error("Nest arguments are unimplemented.");
6870
6871 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6872 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6873 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6874 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6875 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6876 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6877
6878 static const MCPhysReg VR[] = {// Vector registers.
6879 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6880 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6881 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6882
6883 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6884
6885 if (ArgFlags.isByVal()) {
6886 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6887 if (ByValAlign > StackAlign)
6888 report_fatal_error("Pass-by-value arguments with alignment greater than "
6889 "16 are not supported.");
6890
6891 const unsigned ByValSize = ArgFlags.getByValSize();
6892 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6893
6894 // An empty aggregate parameter takes up no storage and no registers,
6895 // but needs a MemLoc for a stack slot for the formal arguments side.
6896 if (ByValSize == 0) {
6898 State.getStackSize(), RegVT, LocInfo));
6899 return false;
6900 }
6901
6902 // Shadow allocate any registers that are not properly aligned.
6903 unsigned NextReg = State.getFirstUnallocated(GPRs);
6904 while (NextReg != GPRs.size() &&
6905 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6906 // Shadow allocate next registers since its aligment is not strict enough.
6907 unsigned Reg = State.AllocateReg(GPRs);
6908 // Allocate the stack space shadowed by said register.
6909 State.AllocateStack(PtrSize, PtrAlign);
6910 assert(Reg && "Alocating register unexpectedly failed.");
6911 (void)Reg;
6912 NextReg = State.getFirstUnallocated(GPRs);
6913 }
6914
6915 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6916 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6917 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6918 if (unsigned Reg = State.AllocateReg(GPRs))
6919 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6920 else {
6923 LocInfo));
6924 break;
6925 }
6926 }
6927 return false;
6928 }
6929
6930 // Arguments always reserve parameter save area.
6931 switch (ValVT.SimpleTy) {
6932 default:
6933 report_fatal_error("Unhandled value type for argument.");
6934 case MVT::i64:
6935 // i64 arguments should have been split to i32 for PPC32.
6936 assert(IsPPC64 && "PPC32 should have split i64 values.");
6937 [[fallthrough]];
6938 case MVT::i1:
6939 case MVT::i32: {
6940 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6941 // AIX integer arguments are always passed in register width.
6942 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6943 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6945 if (unsigned Reg = State.AllocateReg(GPRs))
6946 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6947 else
6948 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6949
6950 return false;
6951 }
6952 case MVT::f32:
6953 case MVT::f64: {
6954 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6955 const unsigned StoreSize = LocVT.getStoreSize();
6956 // Floats are always 4-byte aligned in the PSA on AIX.
6957 // This includes f64 in 64-bit mode for ABI compatibility.
6958 const unsigned Offset =
6959 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6960 unsigned FReg = State.AllocateReg(FPR);
6961 if (FReg)
6962 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6963
6964 // Reserve and initialize GPRs or initialize the PSA as required.
6965 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6966 if (unsigned Reg = State.AllocateReg(GPRs)) {
6967 assert(FReg && "An FPR should be available when a GPR is reserved.");
6968 if (State.isVarArg()) {
6969 // Successfully reserved GPRs are only initialized for vararg calls.
6970 // Custom handling is required for:
6971 // f64 in PPC32 needs to be split into 2 GPRs.
6972 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6973 State.addLoc(
6974 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6975 }
6976 } else {
6977 // If there are insufficient GPRs, the PSA needs to be initialized.
6978 // Initialization occurs even if an FPR was initialized for
6979 // compatibility with the AIX XL compiler. The full memory for the
6980 // argument will be initialized even if a prior word is saved in GPR.
6981 // A custom memLoc is used when the argument also passes in FPR so
6982 // that the callee handling can skip over it easily.
6983 State.addLoc(
6984 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6985 LocInfo)
6986 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6987 break;
6988 }
6989 }
6990
6991 return false;
6992 }
6993 case MVT::v4f32:
6994 case MVT::v4i32:
6995 case MVT::v8i16:
6996 case MVT::v16i8:
6997 case MVT::v2i64:
6998 case MVT::v2f64:
6999 case MVT::v1i128: {
7000 const unsigned VecSize = 16;
7001 const Align VecAlign(VecSize);
7002
7003 if (!State.isVarArg()) {
7004 // If there are vector registers remaining we don't consume any stack
7005 // space.
7006 if (unsigned VReg = State.AllocateReg(VR)) {
7007 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7008 return false;
7009 }
7010 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7011 // might be allocated in the portion of the PSA that is shadowed by the
7012 // GPRs.
7013 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7014 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7015 return false;
7016 }
7017
7018 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7019 // Burn any underaligned registers and their shadowed stack space until
7020 // we reach the required alignment.
7021 while (NextRegIndex != GPRs.size() &&
7022 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7023 // Shadow allocate register and its stack shadow.
7024 unsigned Reg = State.AllocateReg(GPRs);
7025 State.AllocateStack(PtrSize, PtrAlign);
7026 assert(Reg && "Allocating register unexpectedly failed.");
7027 (void)Reg;
7028 NextRegIndex = State.getFirstUnallocated(GPRs);
7029 }
7030
7031 // Vectors that are passed as fixed arguments are handled differently.
7032 // They are passed in VRs if any are available (unlike arguments passed
7033 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7034 // functions)
7035 if (State.isFixed(ValNo)) {
7036 if (unsigned VReg = State.AllocateReg(VR)) {
7037 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7038 // Shadow allocate GPRs and stack space even though we pass in a VR.
7039 for (unsigned I = 0; I != VecSize; I += PtrSize)
7040 State.AllocateReg(GPRs);
7041 State.AllocateStack(VecSize, VecAlign);
7042 return false;
7043 }
7044 // No vector registers remain so pass on the stack.
7045 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7046 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7047 return false;
7048 }
7049
7050 // If all GPRS are consumed then we pass the argument fully on the stack.
7051 if (NextRegIndex == GPRs.size()) {
7052 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7053 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7054 return false;
7055 }
7056
7057 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7058 // half of the argument, and then need to pass the remaining half on the
7059 // stack.
7060 if (GPRs[NextRegIndex] == PPC::R9) {
7061 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7062 State.addLoc(
7063 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7064
7065 const unsigned FirstReg = State.AllocateReg(PPC::R9);
7066 const unsigned SecondReg = State.AllocateReg(PPC::R10);
7067 assert(FirstReg && SecondReg &&
7068 "Allocating R9 or R10 unexpectedly failed.");
7069 State.addLoc(
7070 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7071 State.addLoc(
7072 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7073 return false;
7074 }
7075
7076 // We have enough GPRs to fully pass the vector argument, and we have
7077 // already consumed any underaligned registers. Start with the custom
7078 // MemLoc and then the custom RegLocs.
7079 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7080 State.addLoc(
7081 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7082 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7083 const unsigned Reg = State.AllocateReg(GPRs);
7084 assert(Reg && "Failed to allocated register for vararg vector argument");
7085 State.addLoc(
7086 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7087 }
7088 return false;
7089 }
7090 }
7091 return true;
7092}
7093
7094// So far, this function is only used by LowerFormalArguments_AIX()
7096 bool IsPPC64,
7097 bool HasP8Vector,
7098 bool HasVSX) {
7099 assert((IsPPC64 || SVT != MVT::i64) &&
7100 "i64 should have been split for 32-bit codegen.");
7101
7102 switch (SVT) {
7103 default:
7104 report_fatal_error("Unexpected value type for formal argument");
7105 case MVT::i1:
7106 case MVT::i32:
7107 case MVT::i64:
7108 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7109 case MVT::f32:
7110 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7111 case MVT::f64:
7112 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7113 case MVT::v4f32:
7114 case MVT::v4i32:
7115 case MVT::v8i16:
7116 case MVT::v16i8:
7117 case MVT::v2i64:
7118 case MVT::v2f64:
7119 case MVT::v1i128:
7120 return &PPC::VRRCRegClass;
7121 }
7122}
7123
7125 SelectionDAG &DAG, SDValue ArgValue,
7126 MVT LocVT, const SDLoc &dl) {
7127 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7128 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7129
7130 if (Flags.isSExt())
7131 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7132 DAG.getValueType(ValVT));
7133 else if (Flags.isZExt())
7134 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7135 DAG.getValueType(ValVT));
7136
7137 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7138}
7139
7140static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7141 const unsigned LASize = FL->getLinkageSize();
7142
7143 if (PPC::GPRCRegClass.contains(Reg)) {
7144 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7145 "Reg must be a valid argument register!");
7146 return LASize + 4 * (Reg - PPC::R3);
7147 }
7148
7149 if (PPC::G8RCRegClass.contains(Reg)) {
7150 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7151 "Reg must be a valid argument register!");
7152 return LASize + 8 * (Reg - PPC::X3);
7153 }
7154
7155 llvm_unreachable("Only general purpose registers expected.");
7156}
7157
7158// AIX ABI Stack Frame Layout:
7159//
7160// Low Memory +--------------------------------------------+
7161// SP +---> | Back chain | ---+
7162// | +--------------------------------------------+ |
7163// | | Saved Condition Register | |
7164// | +--------------------------------------------+ |
7165// | | Saved Linkage Register | |
7166// | +--------------------------------------------+ | Linkage Area
7167// | | Reserved for compilers | |
7168// | +--------------------------------------------+ |
7169// | | Reserved for binders | |
7170// | +--------------------------------------------+ |
7171// | | Saved TOC pointer | ---+
7172// | +--------------------------------------------+
7173// | | Parameter save area |
7174// | +--------------------------------------------+
7175// | | Alloca space |
7176// | +--------------------------------------------+
7177// | | Local variable space |
7178// | +--------------------------------------------+
7179// | | Float/int conversion temporary |
7180// | +--------------------------------------------+
7181// | | Save area for AltiVec registers |
7182// | +--------------------------------------------+
7183// | | AltiVec alignment padding |
7184// | +--------------------------------------------+
7185// | | Save area for VRSAVE register |
7186// | +--------------------------------------------+
7187// | | Save area for General Purpose registers |
7188// | +--------------------------------------------+
7189// | | Save area for Floating Point registers |
7190// | +--------------------------------------------+
7191// +---- | Back chain |
7192// High Memory +--------------------------------------------+
7193//
7194// Specifications:
7195// AIX 7.2 Assembler Language Reference
7196// Subroutine linkage convention
7197
7198SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7199 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7200 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7201 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7202
7203 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7204 CallConv == CallingConv::Fast) &&
7205 "Unexpected calling convention!");
7206
7207 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7208 report_fatal_error("Tail call support is unimplemented on AIX.");
7209
7210 if (useSoftFloat())
7211 report_fatal_error("Soft float support is unimplemented on AIX.");
7212
7213 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7214
7215 const bool IsPPC64 = Subtarget.isPPC64();
7216 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7217
7218 // Assign locations to all of the incoming arguments.
7221 MachineFrameInfo &MFI = MF.getFrameInfo();
7222 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7223 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7224
7225 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7226 // Reserve space for the linkage area on the stack.
7227 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7228 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7229 uint64_t SaveStackPos = CCInfo.getStackSize();
7230 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7231 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7232
7234
7235 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7236 CCValAssign &VA = ArgLocs[I++];
7237 MVT LocVT = VA.getLocVT();
7238 MVT ValVT = VA.getValVT();
7239 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7240 // For compatibility with the AIX XL compiler, the float args in the
7241 // parameter save area are initialized even if the argument is available
7242 // in register. The caller is required to initialize both the register
7243 // and memory, however, the callee can choose to expect it in either.
7244 // The memloc is dismissed here because the argument is retrieved from
7245 // the register.
7246 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7247 continue;
7248
7249 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7250 const TargetRegisterClass *RegClass = getRegClassForSVT(
7251 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7252 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7253 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7254 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7255 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7256 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7257 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7258 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7259 MachinePointerInfo(), Align(PtrByteSize));
7260 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7261 MemOps.push_back(StoreReg);
7262 }
7263
7264 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7265 unsigned StoreSize =
7266 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7267 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7268 }
7269
7270 auto HandleMemLoc = [&]() {
7271 const unsigned LocSize = LocVT.getStoreSize();
7272 const unsigned ValSize = ValVT.getStoreSize();
7273 assert((ValSize <= LocSize) &&
7274 "Object size is larger than size of MemLoc");
7275 int CurArgOffset = VA.getLocMemOffset();
7276 // Objects are right-justified because AIX is big-endian.
7277 if (LocSize > ValSize)
7278 CurArgOffset += LocSize - ValSize;
7279 // Potential tail calls could cause overwriting of argument stack slots.
7280 const bool IsImmutable =
7282 (CallConv == CallingConv::Fast));
7283 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7284 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7285 SDValue ArgValue =
7286 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7287 InVals.push_back(ArgValue);
7288 };
7289
7290 // Vector arguments to VaArg functions are passed both on the stack, and
7291 // in any available GPRs. Load the value from the stack and add the GPRs
7292 // as live ins.
7293 if (VA.isMemLoc() && VA.needsCustom()) {
7294 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7295 assert(isVarArg && "Only use custom memloc for vararg.");
7296 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7297 // matching custom RegLocs.
7298 const unsigned OriginalValNo = VA.getValNo();
7299 (void)OriginalValNo;
7300
7301 auto HandleCustomVecRegLoc = [&]() {
7302 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7303 "Missing custom RegLoc.");
7304 VA = ArgLocs[I++];
7305 assert(VA.getValVT().isVector() &&
7306 "Unexpected Val type for custom RegLoc.");
7307 assert(VA.getValNo() == OriginalValNo &&
7308 "ValNo mismatch between custom MemLoc and RegLoc.");
7310 MF.addLiveIn(VA.getLocReg(),
7311 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7312 Subtarget.hasVSX()));
7313 };
7314
7315 HandleMemLoc();
7316 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7317 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7318 // R10.
7319 HandleCustomVecRegLoc();
7320 HandleCustomVecRegLoc();
7321
7322 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7323 // we passed the vector in R5, R6, R7 and R8.
7324 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7325 assert(!IsPPC64 &&
7326 "Only 2 custom RegLocs expected for 64-bit codegen.");
7327 HandleCustomVecRegLoc();
7328 HandleCustomVecRegLoc();
7329 }
7330
7331 continue;
7332 }
7333
7334 if (VA.isRegLoc()) {
7335 if (VA.getValVT().isScalarInteger())
7337 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7338 switch (VA.getValVT().SimpleTy) {
7339 default:
7340 report_fatal_error("Unhandled value type for argument.");
7341 case MVT::f32:
7343 break;
7344 case MVT::f64:
7346 break;
7347 }
7348 } else if (VA.getValVT().isVector()) {
7349 switch (VA.getValVT().SimpleTy) {
7350 default:
7351 report_fatal_error("Unhandled value type for argument.");
7352 case MVT::v16i8:
7354 break;
7355 case MVT::v8i16:
7357 break;
7358 case MVT::v4i32:
7359 case MVT::v2i64:
7360 case MVT::v1i128:
7362 break;
7363 case MVT::v4f32:
7364 case MVT::v2f64:
7366 break;
7367 }
7368 }
7369 }
7370
7371 if (Flags.isByVal() && VA.isMemLoc()) {
7372 const unsigned Size =
7373 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7374 PtrByteSize);
7375 const int FI = MF.getFrameInfo().CreateFixedObject(
7376 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7377 /* IsAliased */ true);
7378 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7379 InVals.push_back(FIN);
7380
7381 continue;
7382 }
7383
7384 if (Flags.isByVal()) {
7385 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7386
7387 const MCPhysReg ArgReg = VA.getLocReg();
7388 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7389
7390 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7391 const int FI = MF.getFrameInfo().CreateFixedObject(
7392 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7393 /* IsAliased */ true);
7394 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7395 InVals.push_back(FIN);
7396
7397 // Add live ins for all the RegLocs for the same ByVal.
7398 const TargetRegisterClass *RegClass =
7399 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7400
7401 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7402 unsigned Offset) {
7403 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7404 // Since the callers side has left justified the aggregate in the
7405 // register, we can simply store the entire register into the stack
7406 // slot.
7407 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7408 // The store to the fixedstack object is needed becuase accessing a
7409 // field of the ByVal will use a gep and load. Ideally we will optimize
7410 // to extracting the value from the register directly, and elide the
7411 // stores when the arguments address is not taken, but that will need to
7412 // be future work.
7413 SDValue Store = DAG.getStore(
7414 CopyFrom.getValue(1), dl, CopyFrom,
7417
7418 MemOps.push_back(Store);
7419 };
7420
7421 unsigned Offset = 0;
7422 HandleRegLoc(VA.getLocReg(), Offset);
7423 Offset += PtrByteSize;
7424 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7425 Offset += PtrByteSize) {
7426 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7427 "RegLocs should be for ByVal argument.");
7428
7429 const CCValAssign RL = ArgLocs[I++];
7430 HandleRegLoc(RL.getLocReg(), Offset);
7432 }
7433
7434 if (Offset != StackSize) {
7435 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7436 "Expected MemLoc for remaining bytes.");
7437 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7438 // Consume the MemLoc.The InVal has already been emitted, so nothing
7439 // more needs to be done.
7440 ++I;
7441 }
7442
7443 continue;
7444 }
7445
7446 if (VA.isRegLoc() && !VA.needsCustom()) {
7447 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7448 Register VReg =
7449 MF.addLiveIn(VA.getLocReg(),
7450 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7451 Subtarget.hasVSX()));
7452 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7453 if (ValVT.isScalarInteger() &&
7454 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7455 ArgValue =
7456 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7457 }
7458 InVals.push_back(ArgValue);
7459 continue;
7460 }
7461 if (VA.isMemLoc()) {
7462 HandleMemLoc();
7463 continue;
7464 }
7465 }
7466
7467 // On AIX a minimum of 8 words is saved to the parameter save area.
7468 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7469 // Area that is at least reserved in the caller of this function.
7470 unsigned CallerReservedArea = std::max<unsigned>(
7471 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7472
7473 // Set the size that is at least reserved in caller of this function. Tail
7474 // call optimized function's reserved stack space needs to be aligned so
7475 // that taking the difference between two stack areas will result in an
7476 // aligned stack.
7477 CallerReservedArea =
7478 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7479 FuncInfo->setMinReservedArea(CallerReservedArea);
7480
7481 if (isVarArg) {
7482 FuncInfo->setVarArgsFrameIndex(
7483 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7484 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7485
7486 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7487 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7488
7489 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7490 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7491 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7492
7493 // The fixed integer arguments of a variadic function are stored to the
7494 // VarArgsFrameIndex on the stack so that they may be loaded by
7495 // dereferencing the result of va_next.
7496 for (unsigned GPRIndex =
7497 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7498 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7499
7500 const Register VReg =
7501 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7502 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7503
7504 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7505 SDValue Store =
7506 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7507 MemOps.push_back(Store);
7508 // Increment the address for the next argument to store.
7509 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7510 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7511 }
7512 }
7513
7514 if (!MemOps.empty())
7515 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7516
7517 return Chain;
7518}
7519
7520SDValue PPCTargetLowering::LowerCall_AIX(
7521 SDValue Chain, SDValue Callee, CallFlags CFlags,
7523 const SmallVectorImpl<SDValue> &OutVals,
7524 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7526 const CallBase *CB) const {
7527 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7528 // AIX ABI stack frame layout.
7529
7530 assert((CFlags.CallConv == CallingConv::C ||
7531 CFlags.CallConv == CallingConv::Cold ||
7532 CFlags.CallConv == CallingConv::Fast) &&
7533 "Unexpected calling convention!");
7534
7535 if (CFlags.IsPatchPoint)
7536 report_fatal_error("This call type is unimplemented on AIX.");
7537
7538 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7539
7542 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7543 *DAG.getContext());
7544
7545 // Reserve space for the linkage save area (LSA) on the stack.
7546 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7547 // [SP][CR][LR][2 x reserved][TOC].
7548 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7549 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7550 const bool IsPPC64 = Subtarget.isPPC64();
7551 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7552 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7553 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7554 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7555
7556 // The prolog code of the callee may store up to 8 GPR argument registers to
7557 // the stack, allowing va_start to index over them in memory if the callee
7558 // is variadic.
7559 // Because we cannot tell if this is needed on the caller side, we have to
7560 // conservatively assume that it is needed. As such, make sure we have at
7561 // least enough stack space for the caller to store the 8 GPRs.
7562 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7563 const unsigned NumBytes = std::max<unsigned>(
7564 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7565
7566 // Adjust the stack pointer for the new arguments...
7567 // These operations are automatically eliminated by the prolog/epilog pass.
7568 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7569 SDValue CallSeqStart = Chain;
7570
7572 SmallVector<SDValue, 8> MemOpChains;
7573
7574 // Set up a copy of the stack pointer for loading and storing any
7575 // arguments that may not fit in the registers available for argument
7576 // passing.
7577 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7578 : DAG.getRegister(PPC::R1, MVT::i32);
7579
7580 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7581 const unsigned ValNo = ArgLocs[I].getValNo();
7582 SDValue Arg = OutVals[ValNo];
7583 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7584
7585 if (Flags.isByVal()) {
7586 const unsigned ByValSize = Flags.getByValSize();
7587
7588 // Nothing to do for zero-sized ByVals on the caller side.
7589 if (!ByValSize) {
7590 ++I;
7591 continue;
7592 }
7593
7594 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7595 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7596 (LoadOffset != 0)
7597 ? DAG.getObjectPtrOffset(
7598 dl, Arg, TypeSize::getFixed(LoadOffset))
7599 : Arg,
7600 MachinePointerInfo(), VT);
7601 };
7602
7603 unsigned LoadOffset = 0;
7604
7605 // Initialize registers, which are fully occupied by the by-val argument.
7606 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7607 SDValue Load = GetLoad(PtrVT, LoadOffset);
7608 MemOpChains.push_back(Load.getValue(1));
7609 LoadOffset += PtrByteSize;
7610 const CCValAssign &ByValVA = ArgLocs[I++];
7611 assert(ByValVA.getValNo() == ValNo &&
7612 "Unexpected location for pass-by-value argument.");
7613 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7614 }
7615
7616 if (LoadOffset == ByValSize)
7617 continue;
7618
7619 // There must be one more loc to handle the remainder.
7620 assert(ArgLocs[I].getValNo() == ValNo &&
7621 "Expected additional location for by-value argument.");
7622
7623 if (ArgLocs[I].isMemLoc()) {
7624 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7625 const CCValAssign &ByValVA = ArgLocs[I++];
7626 ISD::ArgFlagsTy MemcpyFlags = Flags;
7627 // Only memcpy the bytes that don't pass in register.
7628 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7629 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7630 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7631 dl, Arg, TypeSize::getFixed(LoadOffset))
7632 : Arg,
7634 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7635 CallSeqStart, MemcpyFlags, DAG, dl);
7636 continue;
7637 }
7638
7639 // Initialize the final register residue.
7640 // Any residue that occupies the final by-val arg register must be
7641 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7642 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7643 // 2 and 1 byte loads.
7644 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7645 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7646 "Unexpected register residue for by-value argument.");
7647 SDValue ResidueVal;
7648 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7649 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7650 const MVT VT =
7651 N == 1 ? MVT::i8
7652 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7653 SDValue Load = GetLoad(VT, LoadOffset);
7654 MemOpChains.push_back(Load.getValue(1));
7655 LoadOffset += N;
7656 Bytes += N;
7657
7658 // By-val arguments are passed left-justfied in register.
7659 // Every load here needs to be shifted, otherwise a full register load
7660 // should have been used.
7661 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7662 "Unexpected load emitted during handling of pass-by-value "
7663 "argument.");
7664 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7665 EVT ShiftAmountTy =
7666 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7667 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7668 SDValue ShiftedLoad =
7669 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7670 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7671 ShiftedLoad)
7672 : ShiftedLoad;
7673 }
7674
7675 const CCValAssign &ByValVA = ArgLocs[I++];
7676 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7677 continue;
7678 }
7679
7680 CCValAssign &VA = ArgLocs[I++];
7681 const MVT LocVT = VA.getLocVT();
7682 const MVT ValVT = VA.getValVT();
7683
7684 switch (VA.getLocInfo()) {
7685 default:
7686 report_fatal_error("Unexpected argument extension type.");
7687 case CCValAssign::Full:
7688 break;
7689 case CCValAssign::ZExt:
7690 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7691 break;
7692 case CCValAssign::SExt:
7693 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7694 break;
7695 }
7696
7697 if (VA.isRegLoc() && !VA.needsCustom()) {
7698 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7699 continue;
7700 }
7701
7702 // Vector arguments passed to VarArg functions need custom handling when
7703 // they are passed (at least partially) in GPRs.
7704 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7705 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7706 // Store value to its stack slot.
7707 SDValue PtrOff =
7708 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7709 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7710 SDValue Store =
7711 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7712 MemOpChains.push_back(Store);
7713 const unsigned OriginalValNo = VA.getValNo();
7714 // Then load the GPRs from the stack
7715 unsigned LoadOffset = 0;
7716 auto HandleCustomVecRegLoc = [&]() {
7717 assert(I != E && "Unexpected end of CCvalAssigns.");
7718 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7719 "Expected custom RegLoc.");
7720 CCValAssign RegVA = ArgLocs[I++];
7721 assert(RegVA.getValNo() == OriginalValNo &&
7722 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7723 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7724 DAG.getConstant(LoadOffset, dl, PtrVT));
7725 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7726 MemOpChains.push_back(Load.getValue(1));
7727 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7728 LoadOffset += PtrByteSize;
7729 };
7730
7731 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7732 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7733 // R10.
7734 HandleCustomVecRegLoc();
7735 HandleCustomVecRegLoc();
7736
7737 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7738 ArgLocs[I].getValNo() == OriginalValNo) {
7739 assert(!IsPPC64 &&
7740 "Only 2 custom RegLocs expected for 64-bit codegen.");
7741 HandleCustomVecRegLoc();
7742 HandleCustomVecRegLoc();
7743 }
7744
7745 continue;
7746 }
7747
7748 if (VA.isMemLoc()) {
7749 SDValue PtrOff =
7750 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7751 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7752 MemOpChains.push_back(
7753 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7754
7755 continue;
7756 }
7757
7758 if (!ValVT.isFloatingPoint())
7760 "Unexpected register handling for calling convention.");
7761
7762 // Custom handling is used for GPR initializations for vararg float
7763 // arguments.
7764 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7765 LocVT.isInteger() &&
7766 "Custom register handling only expected for VarArg.");
7767
7768 SDValue ArgAsInt =
7769 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7770
7771 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7772 // f32 in 32-bit GPR
7773 // f64 in 64-bit GPR
7774 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7775 else if (Arg.getValueType().getFixedSizeInBits() <
7776 LocVT.getFixedSizeInBits())
7777 // f32 in 64-bit GPR.
7778 RegsToPass.push_back(std::make_pair(
7779 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7780 else {
7781 // f64 in two 32-bit GPRs
7782 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7783 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7784 "Unexpected custom register for argument!");
7785 CCValAssign &GPR1 = VA;
7786 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7787 DAG.getConstant(32, dl, MVT::i8));
7788 RegsToPass.push_back(std::make_pair(
7789 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7790
7791 if (I != E) {
7792 // If only 1 GPR was available, there will only be one custom GPR and
7793 // the argument will also pass in memory.
7794 CCValAssign &PeekArg = ArgLocs[I];
7795 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7796 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7797 CCValAssign &GPR2 = ArgLocs[I++];
7798 RegsToPass.push_back(std::make_pair(
7799 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7800 }
7801 }
7802 }
7803 }
7804
7805 if (!MemOpChains.empty())
7806 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7807
7808 // For indirect calls, we need to save the TOC base to the stack for
7809 // restoration after the call.
7810 if (CFlags.IsIndirect) {
7811 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7812 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7813 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7814 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7815 const unsigned TOCSaveOffset =
7816 Subtarget.getFrameLowering()->getTOCSaveOffset();
7817
7818 setUsesTOCBasePtr(DAG);
7819 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7820 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7821 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7822 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7823 Chain = DAG.getStore(
7824 Val.getValue(1), dl, Val, AddPtr,
7825 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7826 }
7827
7828 // Build a sequence of copy-to-reg nodes chained together with token chain
7829 // and flag operands which copy the outgoing args into the appropriate regs.
7830 SDValue InGlue;
7831 for (auto Reg : RegsToPass) {
7832 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7833 InGlue = Chain.getValue(1);
7834 }
7835
7836 const int SPDiff = 0;
7837 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7838 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7839}
7840
7841bool
7842PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7843 MachineFunction &MF, bool isVarArg,
7845 LLVMContext &Context) const {
7847 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7848 return CCInfo.CheckReturn(
7849 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7851 : RetCC_PPC);
7852}
7853
7854SDValue
7855PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7856 bool isVarArg,
7858 const SmallVectorImpl<SDValue> &OutVals,
7859 const SDLoc &dl, SelectionDAG &DAG) const {
7861 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7862 *DAG.getContext());
7863 CCInfo.AnalyzeReturn(Outs,
7864 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7866 : RetCC_PPC);
7867
7868 SDValue Glue;
7869 SmallVector<SDValue, 4> RetOps(1, Chain);
7870
7871 // Copy the result values into the output registers.
7872 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7873 CCValAssign &VA = RVLocs[i];
7874 assert(VA.isRegLoc() && "Can only return in registers!");
7875
7876 SDValue Arg = OutVals[RealResIdx];
7877
7878 switch (VA.getLocInfo()) {
7879 default: llvm_unreachable("Unknown loc info!");
7880 case CCValAssign::Full: break;
7881 case CCValAssign::AExt:
7882 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7883 break;
7884 case CCValAssign::ZExt:
7885 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7886 break;
7887 case CCValAssign::SExt:
7888 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7889 break;
7890 }
7891 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7892 bool isLittleEndian = Subtarget.isLittleEndian();
7893 // Legalize ret f64 -> ret 2 x i32.
7894 SDValue SVal =
7895 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7896 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7897 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7898 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7899 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7900 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7901 Glue = Chain.getValue(1);
7902 VA = RVLocs[++i]; // skip ahead to next loc
7903 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7904 } else
7905 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7906 Glue = Chain.getValue(1);
7907 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7908 }
7909
7910 RetOps[0] = Chain; // Update chain.
7911
7912 // Add the glue if we have it.
7913 if (Glue.getNode())
7914 RetOps.push_back(Glue);
7915
7916 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7917}
7918
7919SDValue
7920PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7921 SelectionDAG &DAG) const {
7922 SDLoc dl(Op);
7923
7924 // Get the correct type for integers.
7925 EVT IntVT = Op.getValueType();
7926
7927 // Get the inputs.
7928 SDValue Chain = Op.getOperand(0);
7929 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7930 // Build a DYNAREAOFFSET node.
7931 SDValue Ops[2] = {Chain, FPSIdx};
7932 SDVTList VTs = DAG.getVTList(IntVT);
7933 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7934}
7935
7936SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7937 SelectionDAG &DAG) const {
7938 // When we pop the dynamic allocation we need to restore the SP link.
7939 SDLoc dl(Op);
7940
7941 // Get the correct type for pointers.
7942 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7943
7944 // Construct the stack pointer operand.
7945 bool isPPC64 = Subtarget.isPPC64();
7946 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7947 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7948
7949 // Get the operands for the STACKRESTORE.
7950 SDValue Chain = Op.getOperand(0);
7951 SDValue SaveSP = Op.getOperand(1);
7952
7953 // Load the old link SP.
7954 SDValue LoadLinkSP =
7955 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7956
7957 // Restore the stack pointer.
7958 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7959
7960 // Store the old link SP.
7961 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7962}
7963
7964SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7966 bool isPPC64 = Subtarget.isPPC64();
7967 EVT PtrVT = getPointerTy(MF.getDataLayout());
7968
7969 // Get current frame pointer save index. The users of this index will be
7970 // primarily DYNALLOC instructions.
7972 int RASI = FI->getReturnAddrSaveIndex();
7973
7974 // If the frame pointer save index hasn't been defined yet.
7975 if (!RASI) {
7976 // Find out what the fix offset of the frame pointer save area.
7977 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7978 // Allocate the frame index for frame pointer save area.
7979 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7980 // Save the result.
7981 FI->setReturnAddrSaveIndex(RASI);
7982 }
7983 return DAG.getFrameIndex(RASI, PtrVT);
7984}
7985
7986SDValue
7987PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7989 bool isPPC64 = Subtarget.isPPC64();
7990 EVT PtrVT = getPointerTy(MF.getDataLayout());
7991
7992 // Get current frame pointer save index. The users of this index will be
7993 // primarily DYNALLOC instructions.
7995 int FPSI = FI->getFramePointerSaveIndex();
7996
7997 // If the frame pointer save index hasn't been defined yet.
7998 if (!FPSI) {
7999 // Find out what the fix offset of the frame pointer save area.
8000 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8001 // Allocate the frame index for frame pointer save area.
8002 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8003 // Save the result.
8004 FI->setFramePointerSaveIndex(FPSI);
8005 }
8006 return DAG.getFrameIndex(FPSI, PtrVT);
8007}
8008
8009SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8010 SelectionDAG &DAG) const {
8012 // Get the inputs.
8013 SDValue Chain = Op.getOperand(0);
8014 SDValue Size = Op.getOperand(1);
8015 SDLoc dl(Op);
8016
8017 // Get the correct type for pointers.
8018 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8019 // Negate the size.
8020 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8021 DAG.getConstant(0, dl, PtrVT), Size);
8022 // Construct a node for the frame pointer save index.
8023 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8024 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8025 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8026 if (hasInlineStackProbe(MF))
8027 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8028 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8029}
8030
8031SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8032 SelectionDAG &DAG) const {
8034
8035 bool isPPC64 = Subtarget.isPPC64();
8036 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8037
8038 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8039 return DAG.getFrameIndex(FI, PtrVT);
8040}
8041
8042SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8043 SelectionDAG &DAG) const {
8044 SDLoc DL(Op);
8045 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8046 DAG.getVTList(MVT::i32, MVT::Other),
8047 Op.getOperand(0), Op.getOperand(1));
8048}
8049
8050SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8051 SelectionDAG &DAG) const {
8052 SDLoc DL(Op);
8053 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8054 Op.getOperand(0), Op.getOperand(1));
8055}
8056
8057SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8058 if (Op.getValueType().isVector())
8059 return LowerVectorLoad(Op, DAG);
8060
8061 assert(Op.getValueType() == MVT::i1 &&
8062 "Custom lowering only for i1 loads");
8063
8064 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8065
8066 SDLoc dl(Op);
8067 LoadSDNode *LD = cast<LoadSDNode>(Op);
8068
8069 SDValue Chain = LD->getChain();
8070 SDValue BasePtr = LD->getBasePtr();
8071 MachineMemOperand *MMO = LD->getMemOperand();
8072
8073 SDValue NewLD =
8074 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8075 BasePtr, MVT::i8, MMO);
8076 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8077
8078 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8079 return DAG.getMergeValues(Ops, dl);
8080}
8081
8082SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8083 if (Op.getOperand(1).getValueType().isVector())
8084 return LowerVectorStore(Op, DAG);
8085
8086 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8087 "Custom lowering only for i1 stores");
8088
8089 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8090
8091 SDLoc dl(Op);
8092 StoreSDNode *ST = cast<StoreSDNode>(Op);
8093
8094 SDValue Chain = ST->getChain();
8095 SDValue BasePtr = ST->getBasePtr();
8096 SDValue Value = ST->getValue();
8097 MachineMemOperand *MMO = ST->getMemOperand();
8098
8100 Value);
8101 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8102}
8103
8104// FIXME: Remove this once the ANDI glue bug is fixed:
8105SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8106 assert(Op.getValueType() == MVT::i1 &&
8107 "Custom lowering only for i1 results");
8108
8109 SDLoc DL(Op);
8110 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8111}
8112
8113SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8114 SelectionDAG &DAG) const {
8115
8116 // Implements a vector truncate that fits in a vector register as a shuffle.
8117 // We want to legalize vector truncates down to where the source fits in
8118 // a vector register (and target is therefore smaller than vector register
8119 // size). At that point legalization will try to custom lower the sub-legal
8120 // result and get here - where we can contain the truncate as a single target
8121 // operation.
8122
8123 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8124 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8125 //
8126 // We will implement it for big-endian ordering as this (where x denotes
8127 // undefined):
8128 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8129 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8130 //
8131 // The same operation in little-endian ordering will be:
8132 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8133 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8134
8135 EVT TrgVT = Op.getValueType();
8136 assert(TrgVT.isVector() && "Vector type expected.");
8137 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8138 EVT EltVT = TrgVT.getVectorElementType();
8139 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8140 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8141 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8142 return SDValue();
8143
8144 SDValue N1 = Op.getOperand(0);
8145 EVT SrcVT = N1.getValueType();
8146 unsigned SrcSize = SrcVT.getSizeInBits();
8147 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8148 !llvm::has_single_bit<uint32_t>(
8150 return SDValue();
8151 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8152 return SDValue();
8153
8154 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8155 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8156
8157 SDLoc DL(Op);
8158 SDValue Op1, Op2;
8159 if (SrcSize == 256) {
8160 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8161 EVT SplitVT =
8163 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8164 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8165 DAG.getConstant(0, DL, VecIdxTy));
8166 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8167 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8168 }
8169 else {
8170 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8171 Op2 = DAG.getUNDEF(WideVT);
8172 }
8173
8174 // First list the elements we want to keep.
8175 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8176 SmallVector<int, 16> ShuffV;
8177 if (Subtarget.isLittleEndian())
8178 for (unsigned i = 0; i < TrgNumElts; ++i)
8179 ShuffV.push_back(i * SizeMult);
8180 else
8181 for (unsigned i = 1; i <= TrgNumElts; ++i)
8182 ShuffV.push_back(i * SizeMult - 1);
8183
8184 // Populate the remaining elements with undefs.
8185 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8186 // ShuffV.push_back(i + WideNumElts);
8187 ShuffV.push_back(WideNumElts + 1);
8188
8189 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8190 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8191 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8192}
8193
8194/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8195/// possible.
8196SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8197 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8198 EVT ResVT = Op.getValueType();
8199 EVT CmpVT = Op.getOperand(0).getValueType();
8200 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8201 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8202 SDLoc dl(Op);
8203
8204 // Without power9-vector, we don't have native instruction for f128 comparison.
8205 // Following transformation to libcall is needed for setcc:
8206 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8207 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8208 SDValue Z = DAG.getSetCC(
8209 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8210 LHS, RHS, CC);
8211 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8212 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8213 }
8214
8215 // Not FP, or using SPE? Not a fsel.
8216 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8217 Subtarget.hasSPE())
8218 return Op;
8219
8220 SDNodeFlags Flags = Op.getNode()->getFlags();
8221
8222 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8223 // presence of infinities.
8224 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8225 switch (CC) {
8226 default:
8227 break;
8228 case ISD::SETOGT:
8229 case ISD::SETGT:
8230 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8231 case ISD::SETOLT:
8232 case ISD::SETLT:
8233 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8234 }
8235 }
8236
8237 // We might be able to do better than this under some circumstances, but in
8238 // general, fsel-based lowering of select is a finite-math-only optimization.
8239 // For more information, see section F.3 of the 2.06 ISA specification.
8240 // With ISA 3.0
8241 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8242 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8243 ResVT == MVT::f128)
8244 return Op;
8245
8246 // If the RHS of the comparison is a 0.0, we don't need to do the
8247 // subtraction at all.
8248 SDValue Sel1;
8249 if (isFloatingPointZero(RHS))
8250 switch (CC) {
8251 default: break; // SETUO etc aren't handled by fsel.
8252 case ISD::SETNE:
8253 std::swap(TV, FV);
8254 [[fallthrough]];
8255 case ISD::SETEQ:
8256 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8257 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8258 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8259 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8260 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8261 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8262 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8263 case ISD::SETULT:
8264 case ISD::SETLT:
8265 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8266 [[fallthrough]];
8267 case ISD::SETOGE:
8268 case ISD::SETGE:
8269 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8270 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8271 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8272 case ISD::SETUGT:
8273 case ISD::SETGT:
8274 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8275 [[fallthrough]];
8276 case ISD::SETOLE:
8277 case ISD::SETLE:
8278 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8279 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8280 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8281 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8282 }
8283
8284 SDValue Cmp;
8285 switch (CC) {
8286 default: break; // SETUO etc aren't handled by fsel.
8287 case ISD::SETNE:
8288 std::swap(TV, FV);
8289 [[fallthrough]];
8290 case ISD::SETEQ:
8291 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8292 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8293 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8294 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8295 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8296 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8297 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8298 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8299 case ISD::SETULT:
8300 case ISD::SETLT:
8301 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8302 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8303 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8304 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8305 case ISD::SETOGE:
8306 case ISD::SETGE:
8307 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8308 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8309 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8310 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8311 case ISD::SETUGT:
8312 case ISD::SETGT:
8313 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8314 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8315 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8316 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8317 case ISD::SETOLE:
8318 case ISD::SETLE:
8319 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8320 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8321 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8322 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8323 }
8324 return Op;
8325}
8326
8327static unsigned getPPCStrictOpcode(unsigned Opc) {
8328 switch (Opc) {
8329 default:
8330 llvm_unreachable("No strict version of this opcode!");
8331 case PPCISD::FCTIDZ:
8332 return PPCISD::STRICT_FCTIDZ;
8333 case PPCISD::FCTIWZ:
8334 return PPCISD::STRICT_FCTIWZ;
8335 case PPCISD::FCTIDUZ:
8337 case PPCISD::FCTIWUZ:
8339 case PPCISD::FCFID:
8340 return PPCISD::STRICT_FCFID;
8341 case PPCISD::FCFIDU:
8342 return PPCISD::STRICT_FCFIDU;
8343 case PPCISD::FCFIDS:
8344 return PPCISD::STRICT_FCFIDS;
8345 case PPCISD::FCFIDUS:
8347 }
8348}
8349
8351 const PPCSubtarget &Subtarget) {
8352 SDLoc dl(Op);
8353 bool IsStrict = Op->isStrictFPOpcode();
8354 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8355 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8356
8357 // TODO: Any other flags to propagate?
8358 SDNodeFlags Flags;
8359 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8360
8361 // For strict nodes, source is the second operand.
8362 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8363 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8364 MVT DestTy = Op.getSimpleValueType();
8365 assert(Src.getValueType().isFloatingPoint() &&
8366 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8367 DestTy == MVT::i64) &&
8368 "Invalid FP_TO_INT types");
8369 if (Src.getValueType() == MVT::f32) {
8370 if (IsStrict) {
8371 Src =
8373 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8374 Chain = Src.getValue(1);
8375 } else
8376 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8377 }
8378 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8379 DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8380 unsigned Opc = ISD::DELETED_NODE;
8381 switch (DestTy.SimpleTy) {
8382 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8383 case MVT::i32:
8384 Opc = IsSigned ? PPCISD::FCTIWZ
8385 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8386 break;
8387 case MVT::i64:
8388 assert((IsSigned || Subtarget.hasFPCVT()) &&
8389 "i64 FP_TO_UINT is supported only with FPCVT");
8390 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8391 }
8392 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8393 SDValue Conv;
8394 if (IsStrict) {
8395 Opc = getPPCStrictOpcode(Opc);
8396 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8397 Flags);
8398 } else {
8399 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8400 }
8401 return Conv;
8402}
8403
8404void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8405 SelectionDAG &DAG,
8406 const SDLoc &dl) const {
8407 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8408 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8409 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8410 bool IsStrict = Op->isStrictFPOpcode();
8411
8412 // Convert the FP value to an int value through memory.
8413 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8414 (IsSigned || Subtarget.hasFPCVT());
8415 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8416 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8417 MachinePointerInfo MPI =
8419
8420 // Emit a store to the stack slot.
8421 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8422 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8423 if (i32Stack) {
8425 Alignment = Align(4);
8426 MachineMemOperand *MMO =
8427 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8428 SDValue Ops[] = { Chain, Tmp, FIPtr };
8429 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8430 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8431 } else
8432 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8433
8434 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8435 // add in a bias on big endian.
8436 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8437 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8438 DAG.getConstant(4, dl, FIPtr.getValueType()));
8439 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8440 }
8441
8442 RLI.Chain = Chain;
8443 RLI.Ptr = FIPtr;
8444 RLI.MPI = MPI;
8445 RLI.Alignment = Alignment;
8446}
8447
8448/// Custom lowers floating point to integer conversions to use
8449/// the direct move instructions available in ISA 2.07 to avoid the
8450/// need for load/store combinations.
8451SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8452 SelectionDAG &DAG,
8453 const SDLoc &dl) const {
8454 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8455 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8456 if (Op->isStrictFPOpcode())
8457 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8458 else
8459 return Mov;
8460}
8461
8462SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8463 const SDLoc &dl) const {
8464 bool IsStrict = Op->isStrictFPOpcode();
8465 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8466 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8467 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8468 EVT SrcVT = Src.getValueType();
8469 EVT DstVT = Op.getValueType();
8470
8471 // FP to INT conversions are legal for f128.
8472 if (SrcVT == MVT::f128)
8473 return Subtarget.hasP9Vector() ? Op : SDValue();
8474
8475 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8476 // PPC (the libcall is not available).
8477 if (SrcVT == MVT::ppcf128) {
8478 if (DstVT == MVT::i32) {
8479 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8480 // set other fast-math flags to FP operations in both strict and
8481 // non-strict cases. (FP_TO_SINT, FSUB)
8483 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8484
8485 if (IsSigned) {
8486 SDValue Lo, Hi;
8487 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8488
8489 // Add the two halves of the long double in round-to-zero mode, and use
8490 // a smaller FP_TO_SINT.
8491 if (IsStrict) {
8493 DAG.getVTList(MVT::f64, MVT::Other),
8494 {Op.getOperand(0), Lo, Hi}, Flags);
8495 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8496 DAG.getVTList(MVT::i32, MVT::Other),
8497 {Res.getValue(1), Res}, Flags);
8498 } else {
8499 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8500 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8501 }
8502 } else {
8503 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8504 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8505 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8506 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8507 if (IsStrict) {
8508 // Sel = Src < 0x80000000
8509 // FltOfs = select Sel, 0.0, 0x80000000
8510 // IntOfs = select Sel, 0, 0x80000000
8511 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8512 SDValue Chain = Op.getOperand(0);
8513 EVT SetCCVT =
8514 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8515 EVT DstSetCCVT =
8516 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8517 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8518 Chain, true);
8519 Chain = Sel.getValue(1);
8520
8521 SDValue FltOfs = DAG.getSelect(
8522 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8523 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8524
8525 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8526 DAG.getVTList(SrcVT, MVT::Other),
8527 {Chain, Src, FltOfs}, Flags);
8528 Chain = Val.getValue(1);
8529 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8530 DAG.getVTList(DstVT, MVT::Other),
8531 {Chain, Val}, Flags);
8532 Chain = SInt.getValue(1);
8533 SDValue IntOfs = DAG.getSelect(
8534 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8535 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8536 return DAG.getMergeValues({Result, Chain}, dl);
8537 } else {
8538 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8539 // FIXME: generated code sucks.
8540 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8541 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8542 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8543 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8544 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8545 }
8546 }
8547 }
8548
8549 return SDValue();
8550 }
8551
8552 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8553 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8554
8555 ReuseLoadInfo RLI;
8556 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8557
8558 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8559 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8560}
8561
8562// We're trying to insert a regular store, S, and then a load, L. If the
8563// incoming value, O, is a load, we might just be able to have our load use the
8564// address used by O. However, we don't know if anything else will store to
8565// that address before we can load from it. To prevent this situation, we need
8566// to insert our load, L, into the chain as a peer of O. To do this, we give L
8567// the same chain operand as O, we create a token factor from the chain results
8568// of O and L, and we replace all uses of O's chain result with that token
8569// factor (see spliceIntoChain below for this last part).
8570bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8571 ReuseLoadInfo &RLI,
8572 SelectionDAG &DAG,
8573 ISD::LoadExtType ET) const {
8574 // Conservatively skip reusing for constrained FP nodes.
8575 if (Op->isStrictFPOpcode())
8576 return false;
8577
8578 SDLoc dl(Op);
8579 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8580 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8581 if (ET == ISD::NON_EXTLOAD &&
8582 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8583 isOperationLegalOrCustom(Op.getOpcode(),
8584 Op.getOperand(0).getValueType())) {
8585
8586 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8587 return true;
8588 }
8589
8590 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8591 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8592 LD->isNonTemporal())
8593 return false;
8594 if (LD->getMemoryVT() != MemVT)
8595 return false;
8596
8597 // If the result of the load is an illegal type, then we can't build a
8598 // valid chain for reuse since the legalised loads and token factor node that
8599 // ties the legalised loads together uses a different output chain then the
8600 // illegal load.
8601 if (!isTypeLegal(LD->getValueType(0)))
8602 return false;
8603
8604 RLI.Ptr = LD->getBasePtr();
8605 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8606 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8607 "Non-pre-inc AM on PPC?");
8608 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8609 LD->getOffset());
8610 }
8611
8612 RLI.Chain = LD->getChain();
8613 RLI.MPI = LD->getPointerInfo();
8614 RLI.IsDereferenceable = LD->isDereferenceable();
8615 RLI.IsInvariant = LD->isInvariant();
8616 RLI.Alignment = LD->getAlign();
8617 RLI.AAInfo = LD->getAAInfo();
8618 RLI.Ranges = LD->getRanges();
8619
8620 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8621 return true;
8622}
8623
8624// Given the head of the old chain, ResChain, insert a token factor containing
8625// it and NewResChain, and make users of ResChain now be users of that token
8626// factor.
8627// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8628void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8629 SDValue NewResChain,
8630 SelectionDAG &DAG) const {
8631 if (!ResChain)
8632 return;
8633
8634 SDLoc dl(NewResChain);
8635
8636 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8637 NewResChain, DAG.getUNDEF(MVT::Other));
8638 assert(TF.getNode() != NewResChain.getNode() &&
8639 "A new TF really is required here");
8640
8641 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8642 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8643}
8644
8645/// Analyze profitability of direct move
8646/// prefer float load to int load plus direct move
8647/// when there is no integer use of int load
8648bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8649 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8650 if (Origin->getOpcode() != ISD::LOAD)
8651 return true;
8652
8653 // If there is no LXSIBZX/LXSIHZX, like Power8,
8654 // prefer direct move if the memory size is 1 or 2 bytes.
8655 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8656 if (!Subtarget.hasP9Vector() &&
8657 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8658 return true;
8659
8660 for (SDNode::use_iterator UI = Origin->use_begin(),
8661 UE = Origin->use_end();
8662 UI != UE; ++UI) {
8663
8664 // Only look at the users of the loaded value.
8665 if (UI.getUse().get().getResNo() != 0)
8666 continue;
8667
8668 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8669 UI->getOpcode() != ISD::UINT_TO_FP &&
8670 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8671 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8672 return true;
8673 }
8674
8675 return false;
8676}
8677
8679 const PPCSubtarget &Subtarget,
8680 SDValue Chain = SDValue()) {
8681 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8682 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8683 SDLoc dl(Op);
8684
8685 // TODO: Any other flags to propagate?
8686 SDNodeFlags Flags;
8687 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8688
8689 // If we have FCFIDS, then use it when converting to single-precision.
8690 // Otherwise, convert to double-precision and then round.
8691 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8692 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8693 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8694 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8695 if (Op->isStrictFPOpcode()) {
8696 if (!Chain)
8697 Chain = Op.getOperand(0);
8698 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8699 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8700 } else
8701 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8702}
8703
8704/// Custom lowers integer to floating point conversions to use
8705/// the direct move instructions available in ISA 2.07 to avoid the
8706/// need for load/store combinations.
8707SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8708 SelectionDAG &DAG,
8709 const SDLoc &dl) const {
8710 assert((Op.getValueType() == MVT::f32 ||
8711 Op.getValueType() == MVT::f64) &&
8712 "Invalid floating point type as target of conversion");
8713 assert(Subtarget.hasFPCVT() &&
8714 "Int to FP conversions with direct moves require FPCVT");
8715 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8716 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8717 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8718 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8719 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8720 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8721 return convertIntToFP(Op, Mov, DAG, Subtarget);
8722}
8723
8724static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8725
8726 EVT VecVT = Vec.getValueType();
8727 assert(VecVT.isVector() && "Expected a vector type.");
8728 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8729
8730 EVT EltVT = VecVT.getVectorElementType();
8731 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8732 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8733
8734 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8735 SmallVector<SDValue, 16> Ops(NumConcat);
8736 Ops[0] = Vec;
8737 SDValue UndefVec = DAG.getUNDEF(VecVT);
8738 for (unsigned i = 1; i < NumConcat; ++i)
8739 Ops[i] = UndefVec;
8740
8741 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8742}
8743
8744SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8745 const SDLoc &dl) const {
8746 bool IsStrict = Op->isStrictFPOpcode();
8747 unsigned Opc = Op.getOpcode();
8748 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8749 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8751 "Unexpected conversion type");
8752 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8753 "Supports conversions to v2f64/v4f32 only.");
8754
8755 // TODO: Any other flags to propagate?
8757 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8758
8759 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8760 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8761
8762 SDValue Wide = widenVec(DAG, Src, dl);
8763 EVT WideVT = Wide.getValueType();
8764 unsigned WideNumElts = WideVT.getVectorNumElements();
8765 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8766
8767 SmallVector<int, 16> ShuffV;
8768 for (unsigned i = 0; i < WideNumElts; ++i)
8769 ShuffV.push_back(i + WideNumElts);
8770
8771 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8772 int SaveElts = FourEltRes ? 4 : 2;
8773 if (Subtarget.isLittleEndian())
8774 for (int i = 0; i < SaveElts; i++)
8775 ShuffV[i * Stride] = i;
8776 else
8777 for (int i = 1; i <= SaveElts; i++)
8778 ShuffV[i * Stride - 1] = i - 1;
8779
8780 SDValue ShuffleSrc2 =
8781 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8782 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8783
8784 SDValue Extend;
8785 if (SignedConv) {
8786 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8787 EVT ExtVT = Src.getValueType();
8788 if (Subtarget.hasP9Altivec())
8789 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8790 IntermediateVT.getVectorNumElements());
8791
8792 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8793 DAG.getValueType(ExtVT));
8794 } else
8795 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8796
8797 if (IsStrict)
8798 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8799 {Op.getOperand(0), Extend}, Flags);
8800
8801 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8802}
8803
8804SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8805 SelectionDAG &DAG) const {
8806 SDLoc dl(Op);
8807 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8808 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8809 bool IsStrict = Op->isStrictFPOpcode();
8810 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8811 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8812
8813 // TODO: Any other flags to propagate?
8815 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8816
8817 EVT InVT = Src.getValueType();
8818 EVT OutVT = Op.getValueType();
8819 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8820 isOperationCustom(Op.getOpcode(), InVT))
8821 return LowerINT_TO_FPVector(Op, DAG, dl);
8822
8823 // Conversions to f128 are legal.
8824 if (Op.getValueType() == MVT::f128)
8825 return Subtarget.hasP9Vector() ? Op : SDValue();
8826
8827 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8828 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8829 return SDValue();
8830
8831 if (Src.getValueType() == MVT::i1) {
8832 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8833 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8834 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8835 if (IsStrict)
8836 return DAG.getMergeValues({Sel, Chain}, dl);
8837 else
8838 return Sel;
8839 }
8840
8841 // If we have direct moves, we can do all the conversion, skip the store/load
8842 // however, without FPCVT we can't do most conversions.
8843 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8844 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8845 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8846
8847 assert((IsSigned || Subtarget.hasFPCVT()) &&
8848 "UINT_TO_FP is supported only with FPCVT");
8849
8850 if (Src.getValueType() == MVT::i64) {
8851 SDValue SINT = Src;
8852 // When converting to single-precision, we actually need to convert
8853 // to double-precision first and then round to single-precision.
8854 // To avoid double-rounding effects during that operation, we have
8855 // to prepare the input operand. Bits that might be truncated when
8856 // converting to double-precision are replaced by a bit that won't
8857 // be lost at this stage, but is below the single-precision rounding
8858 // position.
8859 //
8860 // However, if -enable-unsafe-fp-math is in effect, accept double
8861 // rounding to avoid the extra overhead.
8862 if (Op.getValueType() == MVT::f32 &&
8863 !Subtarget.hasFPCVT() &&
8865
8866 // Twiddle input to make sure the low 11 bits are zero. (If this
8867 // is the case, we are guaranteed the value will fit into the 53 bit
8868 // mantissa of an IEEE double-precision value without rounding.)
8869 // If any of those low 11 bits were not zero originally, make sure
8870 // bit 12 (value 2048) is set instead, so that the final rounding
8871 // to single-precision gets the correct result.
8872 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8873 SINT, DAG.getConstant(2047, dl, MVT::i64));
8874 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8875 Round, DAG.getConstant(2047, dl, MVT::i64));
8876 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8877 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8878 Round, DAG.getConstant(-2048, dl, MVT::i64));
8879
8880 // However, we cannot use that value unconditionally: if the magnitude
8881 // of the input value is small, the bit-twiddling we did above might
8882 // end up visibly changing the output. Fortunately, in that case, we
8883 // don't need to twiddle bits since the original input will convert
8884 // exactly to double-precision floating-point already. Therefore,
8885 // construct a conditional to use the original value if the top 11
8886 // bits are all sign-bit copies, and use the rounded value computed
8887 // above otherwise.
8888 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8889 SINT, DAG.getConstant(53, dl, MVT::i32));
8890 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8891 Cond, DAG.getConstant(1, dl, MVT::i64));
8892 Cond = DAG.getSetCC(
8893 dl,
8894 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8895 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8896
8897 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8898 }
8899
8900 ReuseLoadInfo RLI;
8901 SDValue Bits;
8902
8904 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8905 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8906 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8907 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8908 } else if (Subtarget.hasLFIWAX() &&
8909 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8910 MachineMemOperand *MMO =
8912 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8913 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8915 DAG.getVTList(MVT::f64, MVT::Other),
8916 Ops, MVT::i32, MMO);
8917 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8918 } else if (Subtarget.hasFPCVT() &&
8919 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8920 MachineMemOperand *MMO =
8922 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8923 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8925 DAG.getVTList(MVT::f64, MVT::Other),
8926 Ops, MVT::i32, MMO);
8927 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8928 } else if (((Subtarget.hasLFIWAX() &&
8929 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8930 (Subtarget.hasFPCVT() &&
8931 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8932 SINT.getOperand(0).getValueType() == MVT::i32) {
8933 MachineFrameInfo &MFI = MF.getFrameInfo();
8934 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8935
8936 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8937 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8938
8939 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8941 DAG.getMachineFunction(), FrameIdx));
8942 Chain = Store;
8943
8944 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8945 "Expected an i32 store");
8946
8947 RLI.Ptr = FIdx;
8948 RLI.Chain = Chain;
8949 RLI.MPI =
8951 RLI.Alignment = Align(4);
8952
8953 MachineMemOperand *MMO =
8955 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8956 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8959 dl, DAG.getVTList(MVT::f64, MVT::Other),
8960 Ops, MVT::i32, MMO);
8961 Chain = Bits.getValue(1);
8962 } else
8963 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8964
8965 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8966 if (IsStrict)
8967 Chain = FP.getValue(1);
8968
8969 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8970 if (IsStrict)
8972 DAG.getVTList(MVT::f32, MVT::Other),
8973 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8974 else
8975 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8976 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8977 }
8978 return FP;
8979 }
8980
8981 assert(Src.getValueType() == MVT::i32 &&
8982 "Unhandled INT_TO_FP type in custom expander!");
8983 // Since we only generate this in 64-bit mode, we can take advantage of
8984 // 64-bit registers. In particular, sign extend the input value into the
8985 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8986 // then lfd it and fcfid it.
8988 MachineFrameInfo &MFI = MF.getFrameInfo();
8989 EVT PtrVT = getPointerTy(MF.getDataLayout());
8990
8991 SDValue Ld;
8992 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8993 ReuseLoadInfo RLI;
8994 bool ReusingLoad;
8995 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8996 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8997 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8998
8999 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9001 DAG.getMachineFunction(), FrameIdx));
9002 Chain = Store;
9003
9004 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9005 "Expected an i32 store");
9006
9007 RLI.Ptr = FIdx;
9008 RLI.Chain = Chain;
9009 RLI.MPI =
9011 RLI.Alignment = Align(4);
9012 }
9013
9014 MachineMemOperand *MMO =
9016 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9017 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9018 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9019 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9020 MVT::i32, MMO);
9021 Chain = Ld.getValue(1);
9022 if (ReusingLoad)
9023 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
9024 } else {
9025 assert(Subtarget.isPPC64() &&
9026 "i32->FP without LFIWAX supported only on PPC64");
9027
9028 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9029 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9030
9031 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9032
9033 // STD the extended value into the stack slot.
9034 SDValue Store = DAG.getStore(
9035 Chain, dl, Ext64, FIdx,
9037 Chain = Store;
9038
9039 // Load the value as a double.
9040 Ld = DAG.getLoad(
9041 MVT::f64, dl, Chain, FIdx,
9043 Chain = Ld.getValue(1);
9044 }
9045
9046 // FCFID it and return it.
9047 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9048 if (IsStrict)
9049 Chain = FP.getValue(1);
9050 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9051 if (IsStrict)
9053 DAG.getVTList(MVT::f32, MVT::Other),
9054 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
9055 else
9056 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9057 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9058 }
9059 return FP;
9060}
9061
9062SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9063 SelectionDAG &DAG) const {
9064 SDLoc dl(Op);
9065 /*
9066 The rounding mode is in bits 30:31 of FPSR, and has the following
9067 settings:
9068 00 Round to nearest
9069 01 Round to 0
9070 10 Round to +inf
9071 11 Round to -inf
9072
9073 GET_ROUNDING, on the other hand, expects the following:
9074 -1 Undefined
9075 0 Round to 0
9076 1 Round to nearest
9077 2 Round to +inf
9078 3 Round to -inf
9079
9080 To perform the conversion, we do:
9081 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9082 */
9083
9085 EVT VT = Op.getValueType();
9086 EVT PtrVT = getPointerTy(MF.getDataLayout());
9087
9088 // Save FP Control Word to register
9089 SDValue Chain = Op.getOperand(0);
9090 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9091 Chain = MFFS.getValue(1);
9092
9093 SDValue CWD;
9094 if (isTypeLegal(MVT::i64)) {
9095 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9096 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9097 } else {
9098 // Save FP register to stack slot
9099 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9100 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9101 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9102
9103 // Load FP Control Word from low 32 bits of stack slot.
9105 "Stack slot adjustment is valid only on big endian subtargets!");
9106 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9107 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9108 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9109 Chain = CWD.getValue(1);
9110 }
9111
9112 // Transform as necessary
9113 SDValue CWD1 =
9114 DAG.getNode(ISD::AND, dl, MVT::i32,
9115 CWD, DAG.getConstant(3, dl, MVT::i32));
9116 SDValue CWD2 =
9117 DAG.getNode(ISD::SRL, dl, MVT::i32,
9118 DAG.getNode(ISD::AND, dl, MVT::i32,
9119 DAG.getNode(ISD::XOR, dl, MVT::i32,
9120 CWD, DAG.getConstant(3, dl, MVT::i32)),
9121 DAG.getConstant(3, dl, MVT::i32)),
9122 DAG.getConstant(1, dl, MVT::i32));
9123
9124 SDValue RetVal =
9125 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9126
9127 RetVal =
9129 dl, VT, RetVal);
9130
9131 return DAG.getMergeValues({RetVal, Chain}, dl);
9132}
9133
9134SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9135 EVT VT = Op.getValueType();
9136 unsigned BitWidth = VT.getSizeInBits();
9137 SDLoc dl(Op);
9138 assert(Op.getNumOperands() == 3 &&
9139 VT == Op.getOperand(1).getValueType() &&
9140 "Unexpected SHL!");
9141
9142 // Expand into a bunch of logical ops. Note that these ops
9143 // depend on the PPC behavior for oversized shift amounts.
9144 SDValue Lo = Op.getOperand(0);
9145 SDValue Hi = Op.getOperand(1);
9146 SDValue Amt = Op.getOperand(2);
9147 EVT AmtVT = Amt.getValueType();
9148
9149 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9150 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9151 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9152 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9153 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9154 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9155 DAG.getConstant(-BitWidth, dl, AmtVT));
9156 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9157 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9158 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9159 SDValue OutOps[] = { OutLo, OutHi };
9160 return DAG.getMergeValues(OutOps, dl);
9161}
9162
9163SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9164 EVT VT = Op.getValueType();
9165 SDLoc dl(Op);
9166 unsigned BitWidth = VT.getSizeInBits();
9167 assert(Op.getNumOperands() == 3 &&
9168 VT == Op.getOperand(1).getValueType() &&
9169 "Unexpected SRL!");
9170
9171 // Expand into a bunch of logical ops. Note that these ops
9172 // depend on the PPC behavior for oversized shift amounts.
9173 SDValue Lo = Op.getOperand(0);
9174 SDValue Hi = Op.getOperand(1);
9175 SDValue Amt = Op.getOperand(2);
9176 EVT AmtVT = Amt.getValueType();
9177
9178 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9179 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9180 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9181 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9182 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9183 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9184 DAG.getConstant(-BitWidth, dl, AmtVT));
9185 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9186 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9187 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9188 SDValue OutOps[] = { OutLo, OutHi };
9189 return DAG.getMergeValues(OutOps, dl);
9190}
9191
9192SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9193 SDLoc dl(Op);
9194 EVT VT = Op.getValueType();
9195 unsigned BitWidth = VT.getSizeInBits();
9196 assert(Op.getNumOperands() == 3 &&
9197 VT == Op.getOperand(1).getValueType() &&
9198 "Unexpected SRA!");
9199
9200 // Expand into a bunch of logical ops, followed by a select_cc.
9201 SDValue Lo = Op.getOperand(0);
9202 SDValue Hi = Op.getOperand(1);
9203 SDValue Amt = Op.getOperand(2);
9204 EVT AmtVT = Amt.getValueType();
9205
9206 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9207 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9208 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9209 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9210 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9211 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9212 DAG.getConstant(-BitWidth, dl, AmtVT));
9213 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9214 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9215 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9216 Tmp4, Tmp6, ISD::SETLE);
9217 SDValue OutOps[] = { OutLo, OutHi };
9218 return DAG.getMergeValues(OutOps, dl);
9219}
9220
9221SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9222 SelectionDAG &DAG) const {
9223 SDLoc dl(Op);
9224 EVT VT = Op.getValueType();
9225 unsigned BitWidth = VT.getSizeInBits();
9226
9227 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9228 SDValue X = Op.getOperand(0);
9229 SDValue Y = Op.getOperand(1);
9230 SDValue Z = Op.getOperand(2);
9231 EVT AmtVT = Z.getValueType();
9232
9233 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9234 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9235 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9236 // on PowerPC shift by BW being well defined.
9237 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9238 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9239 SDValue SubZ =
9240 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9241 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9242 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9243 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9244}
9245
9246//===----------------------------------------------------------------------===//
9247// Vector related lowering.
9248//
9249
9250/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9251/// element size of SplatSize. Cast the result to VT.
9252static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9253 SelectionDAG &DAG, const SDLoc &dl) {
9254 static const MVT VTys[] = { // canonical VT to use for each size.
9255 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9256 };
9257
9258 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9259
9260 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9261 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9262 SplatSize = 1;
9263 Val = 0xFF;
9264 }
9265
9266 EVT CanonicalVT = VTys[SplatSize-1];
9267
9268 // Build a canonical splat for this value.
9269 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9270}
9271
9272/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9273/// specified intrinsic ID.
9275 const SDLoc &dl, EVT DestVT = MVT::Other) {
9276 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9277 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9278 DAG.getConstant(IID, dl, MVT::i32), Op);
9279}
9280
9281/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9282/// specified intrinsic ID.
9283static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9284 SelectionDAG &DAG, const SDLoc &dl,
9285 EVT DestVT = MVT::Other) {
9286 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9287 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9288 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9289}
9290
9291/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9292/// specified intrinsic ID.
9293static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9294 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9295 EVT DestVT = MVT::Other) {
9296 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9297 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9298 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9299}
9300
9301/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9302/// amount. The result has the specified value type.
9303static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9304 SelectionDAG &DAG, const SDLoc &dl) {
9305 // Force LHS/RHS to be the right type.
9306 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9307 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9308
9309 int Ops[16];
9310 for (unsigned i = 0; i != 16; ++i)
9311 Ops[i] = i + Amt;
9312 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9313 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9314}
9315
9316/// Do we have an efficient pattern in a .td file for this node?
9317///
9318/// \param V - pointer to the BuildVectorSDNode being matched
9319/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9320///
9321/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9322/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9323/// the opposite is true (expansion is beneficial) are:
9324/// - The node builds a vector out of integers that are not 32 or 64-bits
9325/// - The node builds a vector out of constants
9326/// - The node is a "load-and-splat"
9327/// In all other cases, we will choose to keep the BUILD_VECTOR.
9329 bool HasDirectMove,
9330 bool HasP8Vector) {
9331 EVT VecVT = V->getValueType(0);
9332 bool RightType = VecVT == MVT::v2f64 ||
9333 (HasP8Vector && VecVT == MVT::v4f32) ||
9334 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9335 if (!RightType)
9336 return false;
9337
9338 bool IsSplat = true;
9339 bool IsLoad = false;
9340 SDValue Op0 = V->getOperand(0);
9341
9342 // This function is called in a block that confirms the node is not a constant
9343 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9344 // different constants.
9345 if (V->isConstant())
9346 return false;
9347 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9348 if (V->getOperand(i).isUndef())
9349 return false;
9350 // We want to expand nodes that represent load-and-splat even if the
9351 // loaded value is a floating point truncation or conversion to int.
9352 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9353 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9354 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9355 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9356 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9357 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9358 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9359 IsLoad = true;
9360 // If the operands are different or the input is not a load and has more
9361 // uses than just this BV node, then it isn't a splat.
9362 if (V->getOperand(i) != Op0 ||
9363 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9364 IsSplat = false;
9365 }
9366 return !(IsSplat && IsLoad);
9367}
9368
9369// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9370SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9371
9372 SDLoc dl(Op);
9373 SDValue Op0 = Op->getOperand(0);
9374
9375 SDValue Lo = Op0.getOperand(0);
9376 SDValue Hi = Op0.getOperand(1);
9377
9378 if ((Op.getValueType() != MVT::f128) ||
9379 (Op0.getOpcode() != ISD::BUILD_PAIR) || (Lo.getValueType() != MVT::i64) ||
9380 (Hi.getValueType() != MVT::i64) || !Subtarget.isPPC64())
9381 return SDValue();
9382
9383 if (!Subtarget.isLittleEndian())
9384 std::swap(Lo, Hi);
9385
9386 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9387}
9388
9389static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9390 const SDValue *InputLoad = &Op;
9391 while (InputLoad->getOpcode() == ISD::BITCAST)
9392 InputLoad = &InputLoad->getOperand(0);
9393 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9395 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9396 InputLoad = &InputLoad->getOperand(0);
9397 }
9398 if (InputLoad->getOpcode() != ISD::LOAD)
9399 return nullptr;
9400 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9401 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9402}
9403
9404// Convert the argument APFloat to a single precision APFloat if there is no
9405// loss in information during the conversion to single precision APFloat and the
9406// resulting number is not a denormal number. Return true if successful.
9408 APFloat APFloatToConvert = ArgAPFloat;
9409 bool LosesInfo = true;
9411 &LosesInfo);
9412 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9413 if (Success)
9414 ArgAPFloat = APFloatToConvert;
9415 return Success;
9416}
9417
9418// Bitcast the argument APInt to a double and convert it to a single precision
9419// APFloat, bitcast the APFloat to an APInt and assign it to the original
9420// argument if there is no loss in information during the conversion from
9421// double to single precision APFloat and the resulting number is not a denormal
9422// number. Return true if successful.
9424 double DpValue = ArgAPInt.bitsToDouble();
9425 APFloat APFloatDp(DpValue);
9426 bool Success = convertToNonDenormSingle(APFloatDp);
9427 if (Success)
9428 ArgAPInt = APFloatDp.bitcastToAPInt();
9429 return Success;
9430}
9431
9432// Nondestructive check for convertTonNonDenormSingle.
9434 // Only convert if it loses info, since XXSPLTIDP should
9435 // handle the other case.
9436 APFloat APFloatToConvert = ArgAPFloat;
9437 bool LosesInfo = true;
9439 &LosesInfo);
9440
9441 return (!LosesInfo && !APFloatToConvert.isDenormal());
9442}
9443
9444static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9445 unsigned &Opcode) {
9446 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9447 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9448 return false;
9449
9450 EVT Ty = Op->getValueType(0);
9451 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9452 // as we cannot handle extending loads for these types.
9453 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9454 ISD::isNON_EXTLoad(InputNode))
9455 return true;
9456
9457 EVT MemVT = InputNode->getMemoryVT();
9458 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9459 // memory VT is the same vector element VT type.
9460 // The loads feeding into the v8i16 and v16i8 types will be extending because
9461 // scalar i8/i16 are not legal types.
9462 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9463 (MemVT == Ty.getVectorElementType()))
9464 return true;
9465
9466 if (Ty == MVT::v2i64) {
9467 // Check the extend type, when the input type is i32, and the output vector
9468 // type is v2i64.
9469 if (MemVT == MVT::i32) {
9470 if (ISD::isZEXTLoad(InputNode))
9471 Opcode = PPCISD::ZEXT_LD_SPLAT;
9472 if (ISD::isSEXTLoad(InputNode))
9473 Opcode = PPCISD::SEXT_LD_SPLAT;
9474 }
9475 return true;
9476 }
9477 return false;
9478}
9479
9480// If this is a case we can't handle, return null and let the default
9481// expansion code take care of it. If we CAN select this case, and if it
9482// selects to a single instruction, return Op. Otherwise, if we can codegen
9483// this case more efficiently than a constant pool load, lower it to the
9484// sequence of ops that should be used.
9485SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9486 SelectionDAG &DAG) const {
9487 SDLoc dl(Op);
9488 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9489 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9490
9491 // Check if this is a splat of a constant value.
9492 APInt APSplatBits, APSplatUndef;
9493 unsigned SplatBitSize;
9494 bool HasAnyUndefs;
9495 bool BVNIsConstantSplat =
9496 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9497 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9498
9499 // If it is a splat of a double, check if we can shrink it to a 32 bit
9500 // non-denormal float which when converted back to double gives us the same
9501 // double. This is to exploit the XXSPLTIDP instruction.
9502 // If we lose precision, we use XXSPLTI32DX.
9503 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9504 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9505 // Check the type first to short-circuit so we don't modify APSplatBits if
9506 // this block isn't executed.
9507 if ((Op->getValueType(0) == MVT::v2f64) &&
9508 convertToNonDenormSingle(APSplatBits)) {
9509 SDValue SplatNode = DAG.getNode(
9510 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9511 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9512 return DAG.getBitcast(Op.getValueType(), SplatNode);
9513 } else {
9514 // We may lose precision, so we have to use XXSPLTI32DX.
9515
9516 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9517 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9518 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9519
9520 if (!Hi || !Lo)
9521 // If either load is 0, then we should generate XXLXOR to set to 0.
9522 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9523
9524 if (Hi)
9525 SplatNode = DAG.getNode(
9526 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9527 DAG.getTargetConstant(0, dl, MVT::i32),
9528 DAG.getTargetConstant(Hi, dl, MVT::i32));
9529
9530 if (Lo)
9531 SplatNode =
9532 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9533 DAG.getTargetConstant(1, dl, MVT::i32),
9534 DAG.getTargetConstant(Lo, dl, MVT::i32));
9535
9536 return DAG.getBitcast(Op.getValueType(), SplatNode);
9537 }
9538 }
9539
9540 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9541 unsigned NewOpcode = PPCISD::LD_SPLAT;
9542
9543 // Handle load-and-splat patterns as we have instructions that will do this
9544 // in one go.
9545 if (DAG.isSplatValue(Op, true) &&
9546 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9547 const SDValue *InputLoad = &Op.getOperand(0);
9548 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9549
9550 // If the input load is an extending load, it will be an i32 -> i64
9551 // extending load and isValidSplatLoad() will update NewOpcode.
9552 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9553 unsigned ElementSize =
9554 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9555
9556 assert(((ElementSize == 2 * MemorySize)
9557 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9558 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9559 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9560 "Unmatched element size and opcode!\n");
9561
9562 // Checking for a single use of this load, we have to check for vector
9563 // width (128 bits) / ElementSize uses (since each operand of the
9564 // BUILD_VECTOR is a separate use of the value.
9565 unsigned NumUsesOfInputLD = 128 / ElementSize;
9566 for (SDValue BVInOp : Op->ops())
9567 if (BVInOp.isUndef())
9568 NumUsesOfInputLD--;
9569
9570 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9571 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9572 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9573 // 15", but function IsValidSplatLoad() now will only return true when
9574 // the data at index 0 is not nullptr. So we will not get into trouble for
9575 // these cases.
9576 //
9577 // case 1 - lfiwzx/lfiwax
9578 // 1.1: load result is i32 and is sign/zero extend to i64;
9579 // 1.2: build a v2i64 vector type with above loaded value;
9580 // 1.3: the vector has only one value at index 0, others are all undef;
9581 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9582 if (NumUsesOfInputLD == 1 &&
9583 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9584 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9585 Subtarget.hasLFIWAX()))
9586 return SDValue();
9587
9588 // case 2 - lxvr[hb]x
9589 // 2.1: load result is at most i16;
9590 // 2.2: build a vector with above loaded value;
9591 // 2.3: the vector has only one value at index 0, others are all undef;
9592 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9593 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9594 Subtarget.isISA3_1() && ElementSize <= 16)
9595 return SDValue();
9596
9597 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9598 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9599 Subtarget.hasVSX()) {
9600 SDValue Ops[] = {
9601 LD->getChain(), // Chain
9602 LD->getBasePtr(), // Ptr
9603 DAG.getValueType(Op.getValueType()) // VT
9604 };
9605 SDValue LdSplt = DAG.getMemIntrinsicNode(
9606 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9607 LD->getMemoryVT(), LD->getMemOperand());
9608 // Replace all uses of the output chain of the original load with the
9609 // output chain of the new load.
9610 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9611 LdSplt.getValue(1));
9612 return LdSplt;
9613 }
9614 }
9615
9616 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9617 // 32-bits can be lowered to VSX instructions under certain conditions.
9618 // Without VSX, there is no pattern more efficient than expanding the node.
9619 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9620 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9621 Subtarget.hasP8Vector()))
9622 return Op;
9623 return SDValue();
9624 }
9625
9626 uint64_t SplatBits = APSplatBits.getZExtValue();
9627 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9628 unsigned SplatSize = SplatBitSize / 8;
9629
9630 // First, handle single instruction cases.
9631
9632 // All zeros?
9633 if (SplatBits == 0) {
9634 // Canonicalize all zero vectors to be v4i32.
9635 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9636 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9637 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9638 }
9639 return Op;
9640 }
9641
9642 // We have XXSPLTIW for constant splats four bytes wide.
9643 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9644 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9645 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9646 // turned into a 4-byte splat of 0xABABABAB.
9647 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9648 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9649 Op.getValueType(), DAG, dl);
9650
9651 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9652 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9653 dl);
9654
9655 // We have XXSPLTIB for constant splats one byte wide.
9656 if (Subtarget.hasP9Vector() && SplatSize == 1)
9657 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9658 dl);
9659
9660 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9661 int32_t SextVal = SignExtend32(SplatBits, SplatBitSize);
9662 if (SextVal >= -16 && SextVal <= 15)
9663 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9664 dl);
9665
9666 // Two instruction sequences.
9667
9668 // If this value is in the range [-32,30] and is even, use:
9669 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9670 // If this value is in the range [17,31] and is odd, use:
9671 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9672 // If this value is in the range [-31,-17] and is odd, use:
9673 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9674 // Note the last two are three-instruction sequences.
9675 if (SextVal >= -32 && SextVal <= 31) {
9676 // To avoid having these optimizations undone by constant folding,
9677 // we convert to a pseudo that will be expanded later into one of
9678 // the above forms.
9679 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9680 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9681 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9682 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9683 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9684 if (VT == Op.getValueType())
9685 return RetVal;
9686 else
9687 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9688 }
9689
9690 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9691 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9692 // for fneg/fabs.
9693 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9694 // Make -1 and vspltisw -1:
9695 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9696
9697 // Make the VSLW intrinsic, computing 0x8000_0000.
9698 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9699 OnesV, DAG, dl);
9700
9701 // xor by OnesV to invert it.
9702 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9703 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9704 }
9705
9706 // Check to see if this is a wide variety of vsplti*, binop self cases.
9707 static const signed char SplatCsts[] = {
9708 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9709 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9710 };
9711
9712 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9713 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9714 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9715 int i = SplatCsts[idx];
9716
9717 // Figure out what shift amount will be used by altivec if shifted by i in
9718 // this splat size.
9719 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9720
9721 // vsplti + shl self.
9722 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9723 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9724 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9725 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9726 Intrinsic::ppc_altivec_vslw
9727 };
9728 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9729 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9730 }
9731
9732 // vsplti + srl self.
9733 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9734 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9735 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9736 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9737 Intrinsic::ppc_altivec_vsrw
9738 };
9739 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9740 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9741 }
9742
9743 // vsplti + rol self.
9744 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9745 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9746 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9747 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9748 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9749 Intrinsic::ppc_altivec_vrlw
9750 };
9751 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9752 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9753 }
9754
9755 // t = vsplti c, result = vsldoi t, t, 1
9756 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9757 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9758 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9759 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9760 }
9761 // t = vsplti c, result = vsldoi t, t, 2
9762 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9763 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9764 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9765 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9766 }
9767 // t = vsplti c, result = vsldoi t, t, 3
9768 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9769 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9770 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9771 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9772 }
9773 }
9774
9775 return SDValue();
9776}
9777
9778/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9779/// the specified operations to build the shuffle.
9780static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9781 SDValue RHS, SelectionDAG &DAG,
9782 const SDLoc &dl) {
9783 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9784 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9785 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9786
9787 enum {
9788 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9789 OP_VMRGHW,
9790 OP_VMRGLW,
9791 OP_VSPLTISW0,
9792 OP_VSPLTISW1,
9793 OP_VSPLTISW2,
9794 OP_VSPLTISW3,
9795 OP_VSLDOI4,
9796 OP_VSLDOI8,
9797 OP_VSLDOI12
9798 };
9799
9800 if (OpNum == OP_COPY) {
9801 if (LHSID == (1*9+2)*9+3) return LHS;
9802 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9803 return RHS;
9804 }
9805
9806 SDValue OpLHS, OpRHS;
9807 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9808 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9809
9810 int ShufIdxs[16];
9811 switch (OpNum) {
9812 default: llvm_unreachable("Unknown i32 permute!");
9813 case OP_VMRGHW:
9814 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9815 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9816 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9817 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9818 break;
9819 case OP_VMRGLW:
9820 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9821 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9822 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9823 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9824 break;
9825 case OP_VSPLTISW0:
9826 for (unsigned i = 0; i != 16; ++i)
9827 ShufIdxs[i] = (i&3)+0;
9828 break;
9829 case OP_VSPLTISW1:
9830 for (unsigned i = 0; i != 16; ++i)
9831 ShufIdxs[i] = (i&3)+4;
9832 break;
9833 case OP_VSPLTISW2:
9834 for (unsigned i = 0; i != 16; ++i)
9835 ShufIdxs[i] = (i&3)+8;
9836 break;
9837 case OP_VSPLTISW3:
9838 for (unsigned i = 0; i != 16; ++i)
9839 ShufIdxs[i] = (i&3)+12;
9840 break;
9841 case OP_VSLDOI4:
9842 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9843 case OP_VSLDOI8:
9844 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9845 case OP_VSLDOI12:
9846 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9847 }
9848 EVT VT = OpLHS.getValueType();
9849 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9850 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9851 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9852 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9853}
9854
9855/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9856/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9857/// SDValue.
9858SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9859 SelectionDAG &DAG) const {
9860 const unsigned BytesInVector = 16;
9861 bool IsLE = Subtarget.isLittleEndian();
9862 SDLoc dl(N);
9863 SDValue V1 = N->getOperand(0);
9864 SDValue V2 = N->getOperand(1);
9865 unsigned ShiftElts = 0, InsertAtByte = 0;
9866 bool Swap = false;
9867
9868 // Shifts required to get the byte we want at element 7.
9869 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9870 0, 15, 14, 13, 12, 11, 10, 9};
9871 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9872 1, 2, 3, 4, 5, 6, 7, 8};
9873
9874 ArrayRef<int> Mask = N->getMask();
9875 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9876
9877 // For each mask element, find out if we're just inserting something
9878 // from V2 into V1 or vice versa.
9879 // Possible permutations inserting an element from V2 into V1:
9880 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9881 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9882 // ...
9883 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9884 // Inserting from V1 into V2 will be similar, except mask range will be
9885 // [16,31].
9886
9887 bool FoundCandidate = false;
9888 // If both vector operands for the shuffle are the same vector, the mask
9889 // will contain only elements from the first one and the second one will be
9890 // undef.
9891 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9892 // Go through the mask of half-words to find an element that's being moved
9893 // from one vector to the other.
9894 for (unsigned i = 0; i < BytesInVector; ++i) {
9895 unsigned CurrentElement = Mask[i];
9896 // If 2nd operand is undefined, we should only look for element 7 in the
9897 // Mask.
9898 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9899 continue;
9900
9901 bool OtherElementsInOrder = true;
9902 // Examine the other elements in the Mask to see if they're in original
9903 // order.
9904 for (unsigned j = 0; j < BytesInVector; ++j) {
9905 if (j == i)
9906 continue;
9907 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9908 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9909 // in which we always assume we're always picking from the 1st operand.
9910 int MaskOffset =
9911 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9912 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9913 OtherElementsInOrder = false;
9914 break;
9915 }
9916 }
9917 // If other elements are in original order, we record the number of shifts
9918 // we need to get the element we want into element 7. Also record which byte
9919 // in the vector we should insert into.
9920 if (OtherElementsInOrder) {
9921 // If 2nd operand is undefined, we assume no shifts and no swapping.
9922 if (V2.isUndef()) {
9923 ShiftElts = 0;
9924 Swap = false;
9925 } else {
9926 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9927 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9928 : BigEndianShifts[CurrentElement & 0xF];
9929 Swap = CurrentElement < BytesInVector;
9930 }
9931 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9932 FoundCandidate = true;
9933 break;
9934 }
9935 }
9936
9937 if (!FoundCandidate)
9938 return SDValue();
9939
9940 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9941 // optionally with VECSHL if shift is required.
9942 if (Swap)
9943 std::swap(V1, V2);
9944 if (V2.isUndef())
9945 V2 = V1;
9946 if (ShiftElts) {
9947 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9948 DAG.getConstant(ShiftElts, dl, MVT::i32));
9949 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9950 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9951 }
9952 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9953 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9954}
9955
9956/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9957/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9958/// SDValue.
9959SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9960 SelectionDAG &DAG) const {
9961 const unsigned NumHalfWords = 8;
9962 const unsigned BytesInVector = NumHalfWords * 2;
9963 // Check that the shuffle is on half-words.
9964 if (!isNByteElemShuffleMask(N, 2, 1))
9965 return SDValue();
9966
9967 bool IsLE = Subtarget.isLittleEndian();
9968 SDLoc dl(N);
9969 SDValue V1 = N->getOperand(0);
9970 SDValue V2 = N->getOperand(1);
9971 unsigned ShiftElts = 0, InsertAtByte = 0;
9972 bool Swap = false;
9973
9974 // Shifts required to get the half-word we want at element 3.
9975 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9976 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9977
9978 uint32_t Mask = 0;
9979 uint32_t OriginalOrderLow = 0x1234567;
9980 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9981 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9982 // 32-bit space, only need 4-bit nibbles per element.
9983 for (unsigned i = 0; i < NumHalfWords; ++i) {
9984 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9985 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9986 }
9987
9988 // For each mask element, find out if we're just inserting something
9989 // from V2 into V1 or vice versa. Possible permutations inserting an element
9990 // from V2 into V1:
9991 // X, 1, 2, 3, 4, 5, 6, 7
9992 // 0, X, 2, 3, 4, 5, 6, 7
9993 // 0, 1, X, 3, 4, 5, 6, 7
9994 // 0, 1, 2, X, 4, 5, 6, 7
9995 // 0, 1, 2, 3, X, 5, 6, 7
9996 // 0, 1, 2, 3, 4, X, 6, 7
9997 // 0, 1, 2, 3, 4, 5, X, 7
9998 // 0, 1, 2, 3, 4, 5, 6, X
9999 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10000
10001 bool FoundCandidate = false;
10002 // Go through the mask of half-words to find an element that's being moved
10003 // from one vector to the other.
10004 for (unsigned i = 0; i < NumHalfWords; ++i) {
10005 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10006 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10007 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10008 uint32_t TargetOrder = 0x0;
10009
10010 // If both vector operands for the shuffle are the same vector, the mask
10011 // will contain only elements from the first one and the second one will be
10012 // undef.
10013 if (V2.isUndef()) {
10014 ShiftElts = 0;
10015 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10016 TargetOrder = OriginalOrderLow;
10017 Swap = false;
10018 // Skip if not the correct element or mask of other elements don't equal
10019 // to our expected order.
10020 if (MaskOneElt == VINSERTHSrcElem &&
10021 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10022 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10023 FoundCandidate = true;
10024 break;
10025 }
10026 } else { // If both operands are defined.
10027 // Target order is [8,15] if the current mask is between [0,7].
10028 TargetOrder =
10029 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10030 // Skip if mask of other elements don't equal our expected order.
10031 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10032 // We only need the last 3 bits for the number of shifts.
10033 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10034 : BigEndianShifts[MaskOneElt & 0x7];
10035 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10036 Swap = MaskOneElt < NumHalfWords;
10037 FoundCandidate = true;
10038 break;
10039 }
10040 }
10041 }
10042
10043 if (!FoundCandidate)
10044 return SDValue();
10045
10046 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10047 // optionally with VECSHL if shift is required.
10048 if (Swap)
10049 std::swap(V1, V2);
10050 if (V2.isUndef())
10051 V2 = V1;
10052 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10053 if (ShiftElts) {
10054 // Double ShiftElts because we're left shifting on v16i8 type.
10055 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10056 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10057 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10058 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10059 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10060 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10061 }
10062 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10063 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10064 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10065 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10066}
10067
10068/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10069/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10070/// return the default SDValue.
10071SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10072 SelectionDAG &DAG) const {
10073 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10074 // to v16i8. Peek through the bitcasts to get the actual operands.
10077
10078 auto ShuffleMask = SVN->getMask();
10079 SDValue VecShuffle(SVN, 0);
10080 SDLoc DL(SVN);
10081
10082 // Check that we have a four byte shuffle.
10083 if (!isNByteElemShuffleMask(SVN, 4, 1))
10084 return SDValue();
10085
10086 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10087 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10088 std::swap(LHS, RHS);
10090 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10091 if (!CommutedSV)
10092 return SDValue();
10093 ShuffleMask = CommutedSV->getMask();
10094 }
10095
10096 // Ensure that the RHS is a vector of constants.
10097 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10098 if (!BVN)
10099 return SDValue();
10100
10101 // Check if RHS is a splat of 4-bytes (or smaller).
10102 APInt APSplatValue, APSplatUndef;
10103 unsigned SplatBitSize;
10104 bool HasAnyUndefs;
10105 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10106 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10107 SplatBitSize > 32)
10108 return SDValue();
10109
10110 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10111 // The instruction splats a constant C into two words of the source vector
10112 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10113 // Thus we check that the shuffle mask is the equivalent of
10114 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10115 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10116 // within each word are consecutive, so we only need to check the first byte.
10117 SDValue Index;
10118 bool IsLE = Subtarget.isLittleEndian();
10119 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10120 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10121 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10122 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10123 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10124 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10125 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10126 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10127 else
10128 return SDValue();
10129
10130 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10131 // for XXSPLTI32DX.
10132 unsigned SplatVal = APSplatValue.getZExtValue();
10133 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10134 SplatVal |= (SplatVal << SplatBitSize);
10135
10136 SDValue SplatNode = DAG.getNode(
10137 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10138 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10139 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10140}
10141
10142/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10143/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10144/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10145/// i.e (or (shl x, C1), (srl x, 128-C1)).
10146SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10147 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10148 assert(Op.getValueType() == MVT::v1i128 &&
10149 "Only set v1i128 as custom, other type shouldn't reach here!");
10150 SDLoc dl(Op);
10151 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10152 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10153 unsigned SHLAmt = N1.getConstantOperandVal(0);
10154 if (SHLAmt % 8 == 0) {
10155 std::array<int, 16> Mask;
10156 std::iota(Mask.begin(), Mask.end(), 0);
10157 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10158 if (SDValue Shuffle =
10159 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10160 DAG.getUNDEF(MVT::v16i8), Mask))
10161 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10162 }
10163 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10164 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10165 DAG.getConstant(SHLAmt, dl, MVT::i32));
10166 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10167 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10168 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10169 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10170}
10171
10172/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10173/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10174/// return the code it can be lowered into. Worst case, it can always be
10175/// lowered into a vperm.
10176SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10177 SelectionDAG &DAG) const {
10178 SDLoc dl(Op);
10179 SDValue V1 = Op.getOperand(0);
10180 SDValue V2 = Op.getOperand(1);
10181 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10182
10183 // Any nodes that were combined in the target-independent combiner prior
10184 // to vector legalization will not be sent to the target combine. Try to
10185 // combine it here.
10186 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10187 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10188 return NewShuffle;
10189 Op = NewShuffle;
10190 SVOp = cast<ShuffleVectorSDNode>(Op);
10191 V1 = Op.getOperand(0);
10192 V2 = Op.getOperand(1);
10193 }
10194 EVT VT = Op.getValueType();
10195 bool isLittleEndian = Subtarget.isLittleEndian();
10196
10197 unsigned ShiftElts, InsertAtByte;
10198 bool Swap = false;
10199
10200 // If this is a load-and-splat, we can do that with a single instruction
10201 // in some cases. However if the load has multiple uses, we don't want to
10202 // combine it because that will just produce multiple loads.
10203 bool IsPermutedLoad = false;
10204 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10205 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10206 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10207 InputLoad->hasOneUse()) {
10208 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10209 int SplatIdx =
10210 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10211
10212 // The splat index for permuted loads will be in the left half of the vector
10213 // which is strictly wider than the loaded value by 8 bytes. So we need to
10214 // adjust the splat index to point to the correct address in memory.
10215 if (IsPermutedLoad) {
10216 assert((isLittleEndian || IsFourByte) &&
10217 "Unexpected size for permuted load on big endian target");
10218 SplatIdx += IsFourByte ? 2 : 1;
10219 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10220 "Splat of a value outside of the loaded memory");
10221 }
10222
10223 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10224 // For 4-byte load-and-splat, we need Power9.
10225 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10226 uint64_t Offset = 0;
10227 if (IsFourByte)
10228 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10229 else
10230 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10231
10232 // If the width of the load is the same as the width of the splat,
10233 // loading with an offset would load the wrong memory.
10234 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10235 Offset = 0;
10236
10237 SDValue BasePtr = LD->getBasePtr();
10238 if (Offset != 0)
10240 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10241 SDValue Ops[] = {
10242 LD->getChain(), // Chain
10243 BasePtr, // BasePtr
10244 DAG.getValueType(Op.getValueType()) // VT
10245 };
10246 SDVTList VTL =
10247 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10248 SDValue LdSplt =
10250 Ops, LD->getMemoryVT(), LD->getMemOperand());
10251 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10252 if (LdSplt.getValueType() != SVOp->getValueType(0))
10253 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10254 return LdSplt;
10255 }
10256 }
10257
10258 // All v2i64 and v2f64 shuffles are legal
10259 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10260 return Op;
10261
10262 if (Subtarget.hasP9Vector() &&
10263 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10264 isLittleEndian)) {
10265 if (V2.isUndef())
10266 V2 = V1;
10267 else if (Swap)
10268 std::swap(V1, V2);
10269 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10270 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10271 if (ShiftElts) {
10272 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10273 DAG.getConstant(ShiftElts, dl, MVT::i32));
10274 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10275 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10276 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10277 }
10278 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10279 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10280 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10281 }
10282
10283 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10284 SDValue SplatInsertNode;
10285 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10286 return SplatInsertNode;
10287 }
10288
10289 if (Subtarget.hasP9Altivec()) {
10290 SDValue NewISDNode;
10291 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10292 return NewISDNode;
10293
10294 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10295 return NewISDNode;
10296 }
10297
10298 if (Subtarget.hasVSX() &&
10299 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10300 if (Swap)
10301 std::swap(V1, V2);
10302 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10303 SDValue Conv2 =
10304 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10305
10306 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10307 DAG.getConstant(ShiftElts, dl, MVT::i32));
10308 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10309 }
10310
10311 if (Subtarget.hasVSX() &&
10312 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10313 if (Swap)
10314 std::swap(V1, V2);
10315 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10316 SDValue Conv2 =
10317 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10318
10319 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10320 DAG.getConstant(ShiftElts, dl, MVT::i32));
10321 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10322 }
10323
10324 if (Subtarget.hasP9Vector()) {
10325 if (PPC::isXXBRHShuffleMask(SVOp)) {
10326 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10327 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10328 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10329 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10330 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10331 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10332 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10333 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10334 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10335 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10336 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10337 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10338 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10339 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10340 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10341 }
10342 }
10343
10344 if (Subtarget.hasVSX()) {
10345 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10346 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10347
10348 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10349 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10350 DAG.getConstant(SplatIdx, dl, MVT::i32));
10351 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10352 }
10353
10354 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10355 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10356 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10357 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10358 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10359 }
10360 }
10361
10362 // Cases that are handled by instructions that take permute immediates
10363 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10364 // selected by the instruction selector.
10365 if (V2.isUndef()) {
10366 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10367 PPC::isSplatShuffleMask(SVOp, 2) ||
10368 PPC::isSplatShuffleMask(SVOp, 4) ||
10369 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10370 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10371 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10372 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10373 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10374 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10375 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10376 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10377 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10378 (Subtarget.hasP8Altivec() && (
10379 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10380 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10381 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10382 return Op;
10383 }
10384 }
10385
10386 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10387 // and produce a fixed permutation. If any of these match, do not lower to
10388 // VPERM.
10389 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10390 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10391 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10392 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10393 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10394 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10395 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10396 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10397 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10398 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10399 (Subtarget.hasP8Altivec() && (
10400 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10401 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10402 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10403 return Op;
10404
10405 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10406 // perfect shuffle table to emit an optimal matching sequence.
10407 ArrayRef<int> PermMask = SVOp->getMask();
10408
10409 if (!DisablePerfectShuffle && !isLittleEndian) {
10410 unsigned PFIndexes[4];
10411 bool isFourElementShuffle = true;
10412 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10413 ++i) { // Element number
10414 unsigned EltNo = 8; // Start out undef.
10415 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10416 if (PermMask[i * 4 + j] < 0)
10417 continue; // Undef, ignore it.
10418
10419 unsigned ByteSource = PermMask[i * 4 + j];
10420 if ((ByteSource & 3) != j) {
10421 isFourElementShuffle = false;
10422 break;
10423 }
10424
10425 if (EltNo == 8) {
10426 EltNo = ByteSource / 4;
10427 } else if (EltNo != ByteSource / 4) {
10428 isFourElementShuffle = false;
10429 break;
10430 }
10431 }
10432 PFIndexes[i] = EltNo;
10433 }
10434
10435 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10436 // perfect shuffle vector to determine if it is cost effective to do this as
10437 // discrete instructions, or whether we should use a vperm.
10438 // For now, we skip this for little endian until such time as we have a
10439 // little-endian perfect shuffle table.
10440 if (isFourElementShuffle) {
10441 // Compute the index in the perfect shuffle table.
10442 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10443 PFIndexes[2] * 9 + PFIndexes[3];
10444
10445 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10446 unsigned Cost = (PFEntry >> 30);
10447
10448 // Determining when to avoid vperm is tricky. Many things affect the cost
10449 // of vperm, particularly how many times the perm mask needs to be
10450 // computed. For example, if the perm mask can be hoisted out of a loop or
10451 // is already used (perhaps because there are multiple permutes with the
10452 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10453 // permute mask out of the loop requires an extra register.
10454 //
10455 // As a compromise, we only emit discrete instructions if the shuffle can
10456 // be generated in 3 or fewer operations. When we have loop information
10457 // available, if this block is within a loop, we should avoid using vperm
10458 // for 3-operation perms and use a constant pool load instead.
10459 if (Cost < 3)
10460 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10461 }
10462 }
10463
10464 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10465 // vector that will get spilled to the constant pool.
10466 if (V2.isUndef()) V2 = V1;
10467
10468 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10469}
10470
10471SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10472 ArrayRef<int> PermMask, EVT VT,
10473 SDValue V1, SDValue V2) const {
10474 unsigned Opcode = PPCISD::VPERM;
10475 EVT ValType = V1.getValueType();
10476 SDLoc dl(Op);
10477 bool NeedSwap = false;
10478 bool isLittleEndian = Subtarget.isLittleEndian();
10479 bool isPPC64 = Subtarget.isPPC64();
10480
10481 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10482 (V1->hasOneUse() || V2->hasOneUse())) {
10483 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10484 "XXPERM instead\n");
10485 Opcode = PPCISD::XXPERM;
10486
10487 // The second input to XXPERM is also an output so if the second input has
10488 // multiple uses then copying is necessary, as a result we want the
10489 // single-use operand to be used as the second input to prevent copying.
10490 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10491 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10492 std::swap(V1, V2);
10493 NeedSwap = !NeedSwap;
10494 }
10495 }
10496
10497 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10498 // that it is in input element units, not in bytes. Convert now.
10499
10500 // For little endian, the order of the input vectors is reversed, and
10501 // the permutation mask is complemented with respect to 31. This is
10502 // necessary to produce proper semantics with the big-endian-based vperm
10503 // instruction.
10504 EVT EltVT = V1.getValueType().getVectorElementType();
10505 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10506
10507 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10508 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10509
10510 /*
10511 Vectors will be appended like so: [ V1 | v2 ]
10512 XXSWAPD on V1:
10513 [ A | B | C | D ] -> [ C | D | A | B ]
10514 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10515 i.e. index of A, B += 8, and index of C, D -= 8.
10516 XXSWAPD on V2:
10517 [ E | F | G | H ] -> [ G | H | E | F ]
10518 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10519 i.e. index of E, F += 8, index of G, H -= 8
10520 Swap V1 and V2:
10521 [ V1 | V2 ] -> [ V2 | V1 ]
10522 0-15 16-31 0-15 16-31
10523 i.e. index of V1 += 16, index of V2 -= 16
10524 */
10525
10526 SmallVector<SDValue, 16> ResultMask;
10527 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10528 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10529
10530 if (V1HasXXSWAPD) {
10531 if (SrcElt < 8)
10532 SrcElt += 8;
10533 else if (SrcElt < 16)
10534 SrcElt -= 8;
10535 }
10536 if (V2HasXXSWAPD) {
10537 if (SrcElt > 23)
10538 SrcElt -= 8;
10539 else if (SrcElt > 15)
10540 SrcElt += 8;
10541 }
10542 if (NeedSwap) {
10543 if (SrcElt < 16)
10544 SrcElt += 16;
10545 else
10546 SrcElt -= 16;
10547 }
10548 for (unsigned j = 0; j != BytesPerElement; ++j)
10549 if (isLittleEndian)
10550 ResultMask.push_back(
10551 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10552 else
10553 ResultMask.push_back(
10554 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10555 }
10556
10557 if (V1HasXXSWAPD) {
10558 dl = SDLoc(V1->getOperand(0));
10559 V1 = V1->getOperand(0)->getOperand(1);
10560 }
10561 if (V2HasXXSWAPD) {
10562 dl = SDLoc(V2->getOperand(0));
10563 V2 = V2->getOperand(0)->getOperand(1);
10564 }
10565
10566 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10567 if (ValType != MVT::v2f64)
10568 V1 = DAG.getBitcast(MVT::v2f64, V1);
10569 if (V2.getValueType() != MVT::v2f64)
10570 V2 = DAG.getBitcast(MVT::v2f64, V2);
10571 }
10572
10573 ShufflesHandledWithVPERM++;
10574 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10575 LLVM_DEBUG({
10576 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10577 if (Opcode == PPCISD::XXPERM) {
10578 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10579 } else {
10580 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10581 }
10582 SVOp->dump();
10583 dbgs() << "With the following permute control vector:\n";
10584 VPermMask.dump();
10585 });
10586
10587 if (Opcode == PPCISD::XXPERM)
10588 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10589
10590 // Only need to place items backwards in LE,
10591 // the mask was properly calculated.
10592 if (isLittleEndian)
10593 std::swap(V1, V2);
10594
10595 SDValue VPERMNode =
10596 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10597
10598 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10599 return VPERMNode;
10600}
10601
10602/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10603/// vector comparison. If it is, return true and fill in Opc/isDot with
10604/// information about the intrinsic.
10605static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10606 bool &isDot, const PPCSubtarget &Subtarget) {
10607 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10608 CompareOpc = -1;
10609 isDot = false;
10610 switch (IntrinsicID) {
10611 default:
10612 return false;
10613 // Comparison predicates.
10614 case Intrinsic::ppc_altivec_vcmpbfp_p:
10615 CompareOpc = 966;
10616 isDot = true;
10617 break;
10618 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10619 CompareOpc = 198;
10620 isDot = true;
10621 break;
10622 case Intrinsic::ppc_altivec_vcmpequb_p:
10623 CompareOpc = 6;
10624 isDot = true;
10625 break;
10626 case Intrinsic::ppc_altivec_vcmpequh_p:
10627 CompareOpc = 70;
10628 isDot = true;
10629 break;
10630 case Intrinsic::ppc_altivec_vcmpequw_p:
10631 CompareOpc = 134;
10632 isDot = true;
10633 break;
10634 case Intrinsic::ppc_altivec_vcmpequd_p:
10635 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10636 CompareOpc = 199;
10637 isDot = true;
10638 } else
10639 return false;
10640 break;
10641 case Intrinsic::ppc_altivec_vcmpneb_p:
10642 case Intrinsic::ppc_altivec_vcmpneh_p:
10643 case Intrinsic::ppc_altivec_vcmpnew_p:
10644 case Intrinsic::ppc_altivec_vcmpnezb_p:
10645 case Intrinsic::ppc_altivec_vcmpnezh_p:
10646 case Intrinsic::ppc_altivec_vcmpnezw_p:
10647 if (Subtarget.hasP9Altivec()) {
10648 switch (IntrinsicID) {
10649 default:
10650 llvm_unreachable("Unknown comparison intrinsic.");
10651 case Intrinsic::ppc_altivec_vcmpneb_p:
10652 CompareOpc = 7;
10653 break;
10654 case Intrinsic::ppc_altivec_vcmpneh_p:
10655 CompareOpc = 71;
10656 break;
10657 case Intrinsic::ppc_altivec_vcmpnew_p:
10658 CompareOpc = 135;
10659 break;
10660 case Intrinsic::ppc_altivec_vcmpnezb_p:
10661 CompareOpc = 263;
10662 break;
10663 case Intrinsic::ppc_altivec_vcmpnezh_p:
10664 CompareOpc = 327;
10665 break;
10666 case Intrinsic::ppc_altivec_vcmpnezw_p:
10667 CompareOpc = 391;
10668 break;
10669 }
10670 isDot = true;
10671 } else
10672 return false;
10673 break;
10674 case Intrinsic::ppc_altivec_vcmpgefp_p:
10675 CompareOpc = 454;
10676 isDot = true;
10677 break;
10678 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10679 CompareOpc = 710;
10680 isDot = true;
10681 break;
10682 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10683 CompareOpc = 774;
10684 isDot = true;
10685 break;
10686 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10687 CompareOpc = 838;
10688 isDot = true;
10689 break;
10690 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10691 CompareOpc = 902;
10692 isDot = true;
10693 break;
10694 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10695 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10696 CompareOpc = 967;
10697 isDot = true;
10698 } else
10699 return false;
10700 break;
10701 case Intrinsic::ppc_altivec_vcmpgtub_p:
10702 CompareOpc = 518;
10703 isDot = true;
10704 break;
10705 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10706 CompareOpc = 582;
10707 isDot = true;
10708 break;
10709 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10710 CompareOpc = 646;
10711 isDot = true;
10712 break;
10713 case Intrinsic::ppc_altivec_vcmpgtud_p:
10714 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10715 CompareOpc = 711;
10716 isDot = true;
10717 } else
10718 return false;
10719 break;
10720
10721 case Intrinsic::ppc_altivec_vcmpequq:
10722 case Intrinsic::ppc_altivec_vcmpgtsq:
10723 case Intrinsic::ppc_altivec_vcmpgtuq:
10724 if (!Subtarget.isISA3_1())
10725 return false;
10726 switch (IntrinsicID) {
10727 default:
10728 llvm_unreachable("Unknown comparison intrinsic.");
10729 case Intrinsic::ppc_altivec_vcmpequq:
10730 CompareOpc = 455;
10731 break;
10732 case Intrinsic::ppc_altivec_vcmpgtsq:
10733 CompareOpc = 903;
10734 break;
10735 case Intrinsic::ppc_altivec_vcmpgtuq:
10736 CompareOpc = 647;
10737 break;
10738 }
10739 break;
10740
10741 // VSX predicate comparisons use the same infrastructure
10742 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10743 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10744 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10745 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10746 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10747 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10748 if (Subtarget.hasVSX()) {
10749 switch (IntrinsicID) {
10750 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10751 CompareOpc = 99;
10752 break;
10753 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10754 CompareOpc = 115;
10755 break;
10756 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10757 CompareOpc = 107;
10758 break;
10759 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10760 CompareOpc = 67;
10761 break;
10762 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10763 CompareOpc = 83;
10764 break;
10765 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10766 CompareOpc = 75;
10767 break;
10768 }
10769 isDot = true;
10770 } else
10771 return false;
10772 break;
10773
10774 // Normal Comparisons.
10775 case Intrinsic::ppc_altivec_vcmpbfp:
10776 CompareOpc = 966;
10777 break;
10778 case Intrinsic::ppc_altivec_vcmpeqfp:
10779 CompareOpc = 198;
10780 break;
10781 case Intrinsic::ppc_altivec_vcmpequb:
10782 CompareOpc = 6;
10783 break;
10784 case Intrinsic::ppc_altivec_vcmpequh:
10785 CompareOpc = 70;
10786 break;
10787 case Intrinsic::ppc_altivec_vcmpequw:
10788 CompareOpc = 134;
10789 break;
10790 case Intrinsic::ppc_altivec_vcmpequd:
10791 if (Subtarget.hasP8Altivec())
10792 CompareOpc = 199;
10793 else
10794 return false;
10795 break;
10796 case Intrinsic::ppc_altivec_vcmpneb:
10797 case Intrinsic::ppc_altivec_vcmpneh:
10798 case Intrinsic::ppc_altivec_vcmpnew:
10799 case Intrinsic::ppc_altivec_vcmpnezb:
10800 case Intrinsic::ppc_altivec_vcmpnezh:
10801 case Intrinsic::ppc_altivec_vcmpnezw:
10802 if (Subtarget.hasP9Altivec())
10803 switch (IntrinsicID) {
10804 default:
10805 llvm_unreachable("Unknown comparison intrinsic.");
10806 case Intrinsic::ppc_altivec_vcmpneb:
10807 CompareOpc = 7;
10808 break;
10809 case Intrinsic::ppc_altivec_vcmpneh:
10810 CompareOpc = 71;
10811 break;
10812 case Intrinsic::ppc_altivec_vcmpnew:
10813 CompareOpc = 135;
10814 break;
10815 case Intrinsic::ppc_altivec_vcmpnezb:
10816 CompareOpc = 263;
10817 break;
10818 case Intrinsic::ppc_altivec_vcmpnezh:
10819 CompareOpc = 327;
10820 break;
10821 case Intrinsic::ppc_altivec_vcmpnezw:
10822 CompareOpc = 391;
10823 break;
10824 }
10825 else
10826 return false;
10827 break;
10828 case Intrinsic::ppc_altivec_vcmpgefp:
10829 CompareOpc = 454;
10830 break;
10831 case Intrinsic::ppc_altivec_vcmpgtfp:
10832 CompareOpc = 710;
10833 break;
10834 case Intrinsic::ppc_altivec_vcmpgtsb:
10835 CompareOpc = 774;
10836 break;
10837 case Intrinsic::ppc_altivec_vcmpgtsh:
10838 CompareOpc = 838;
10839 break;
10840 case Intrinsic::ppc_altivec_vcmpgtsw:
10841 CompareOpc = 902;
10842 break;
10843 case Intrinsic::ppc_altivec_vcmpgtsd:
10844 if (Subtarget.hasP8Altivec())
10845 CompareOpc = 967;
10846 else
10847 return false;
10848 break;
10849 case Intrinsic::ppc_altivec_vcmpgtub:
10850 CompareOpc = 518;
10851 break;
10852 case Intrinsic::ppc_altivec_vcmpgtuh:
10853 CompareOpc = 582;
10854 break;
10855 case Intrinsic::ppc_altivec_vcmpgtuw:
10856 CompareOpc = 646;
10857 break;
10858 case Intrinsic::ppc_altivec_vcmpgtud:
10859 if (Subtarget.hasP8Altivec())
10860 CompareOpc = 711;
10861 else
10862 return false;
10863 break;
10864 case Intrinsic::ppc_altivec_vcmpequq_p:
10865 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10866 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10867 if (!Subtarget.isISA3_1())
10868 return false;
10869 switch (IntrinsicID) {
10870 default:
10871 llvm_unreachable("Unknown comparison intrinsic.");
10872 case Intrinsic::ppc_altivec_vcmpequq_p:
10873 CompareOpc = 455;
10874 break;
10875 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10876 CompareOpc = 903;
10877 break;
10878 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10879 CompareOpc = 647;
10880 break;
10881 }
10882 isDot = true;
10883 break;
10884 }
10885 return true;
10886}
10887
10888/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10889/// lower, do it, otherwise return null.
10890SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10891 SelectionDAG &DAG) const {
10892 unsigned IntrinsicID = Op.getConstantOperandVal(0);
10893
10894 SDLoc dl(Op);
10895
10896 switch (IntrinsicID) {
10897 case Intrinsic::thread_pointer:
10898 // Reads the thread pointer register, used for __builtin_thread_pointer.
10899 if (Subtarget.isPPC64())
10900 return DAG.getRegister(PPC::X13, MVT::i64);
10901 return DAG.getRegister(PPC::R2, MVT::i32);
10902
10903 case Intrinsic::ppc_rldimi: {
10904 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10905 SDValue Src = Op.getOperand(1);
10906 APInt Mask = Op.getConstantOperandAPInt(4);
10907 if (Mask.isZero())
10908 return Op.getOperand(2);
10909 if (Mask.isAllOnes())
10910 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
10911 uint64_t SH = Op.getConstantOperandVal(3);
10912 unsigned MB = 0, ME = 0;
10913 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
10914 report_fatal_error("invalid rldimi mask!");
10915 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10916 if (ME < 63 - SH) {
10917 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10918 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10919 } else if (ME > 63 - SH) {
10920 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10921 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10922 }
10923 return SDValue(
10924 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10925 {Op.getOperand(2), Src,
10926 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10927 DAG.getTargetConstant(MB, dl, MVT::i32)}),
10928 0);
10929 }
10930
10931 case Intrinsic::ppc_rlwimi: {
10932 APInt Mask = Op.getConstantOperandAPInt(4);
10933 if (Mask.isZero())
10934 return Op.getOperand(2);
10935 if (Mask.isAllOnes())
10936 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
10937 Op.getOperand(3));
10938 unsigned MB = 0, ME = 0;
10939 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
10940 report_fatal_error("invalid rlwimi mask!");
10941 return SDValue(DAG.getMachineNode(
10942 PPC::RLWIMI, dl, MVT::i32,
10943 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10944 DAG.getTargetConstant(MB, dl, MVT::i32),
10945 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10946 0);
10947 }
10948
10949 case Intrinsic::ppc_rlwnm: {
10950 if (Op.getConstantOperandVal(3) == 0)
10951 return DAG.getConstant(0, dl, MVT::i32);
10952 unsigned MB = 0, ME = 0;
10953 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
10954 report_fatal_error("invalid rlwnm mask!");
10955 return SDValue(
10956 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
10957 {Op.getOperand(1), Op.getOperand(2),
10958 DAG.getTargetConstant(MB, dl, MVT::i32),
10959 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10960 0);
10961 }
10962
10963 case Intrinsic::ppc_mma_disassemble_acc: {
10964 if (Subtarget.isISAFuture()) {
10965 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10966 SDValue WideVec =
10967 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
10968 Op.getOperand(1)),
10969 0);
10971 SDValue Value = SDValue(WideVec.getNode(), 0);
10972 SDValue Value2 = SDValue(WideVec.getNode(), 1);
10973
10974 SDValue Extract;
10975 Extract = DAG.getNode(
10976 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10977 Subtarget.isLittleEndian() ? Value2 : Value,
10978 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10979 dl, getPointerTy(DAG.getDataLayout())));
10980 RetOps.push_back(Extract);
10981 Extract = DAG.getNode(
10982 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10983 Subtarget.isLittleEndian() ? Value2 : Value,
10984 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10985 dl, getPointerTy(DAG.getDataLayout())));
10986 RetOps.push_back(Extract);
10987 Extract = DAG.getNode(
10988 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10989 Subtarget.isLittleEndian() ? Value : Value2,
10990 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10991 dl, getPointerTy(DAG.getDataLayout())));
10992 RetOps.push_back(Extract);
10993 Extract = DAG.getNode(
10994 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10995 Subtarget.isLittleEndian() ? Value : Value2,
10996 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10997 dl, getPointerTy(DAG.getDataLayout())));
10998 RetOps.push_back(Extract);
10999 return DAG.getMergeValues(RetOps, dl);
11000 }
11001 [[fallthrough]];
11002 }
11003 case Intrinsic::ppc_vsx_disassemble_pair: {
11004 int NumVecs = 2;
11005 SDValue WideVec = Op.getOperand(1);
11006 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11007 NumVecs = 4;
11008 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11009 }
11011 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11012 SDValue Extract = DAG.getNode(
11013 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11014 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11015 : VecNo,
11016 dl, getPointerTy(DAG.getDataLayout())));
11017 RetOps.push_back(Extract);
11018 }
11019 return DAG.getMergeValues(RetOps, dl);
11020 }
11021
11022 case Intrinsic::ppc_mma_xxmfacc:
11023 case Intrinsic::ppc_mma_xxmtacc: {
11024 // Allow pre-isa-future subtargets to lower as normal.
11025 if (!Subtarget.isISAFuture())
11026 return SDValue();
11027 // The intrinsics for xxmtacc and xxmfacc take one argument of
11028 // type v512i1, for future cpu the corresponding wacc instruction
11029 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11030 // the need to produce the xxm[t|f]acc.
11031 SDValue WideVec = Op.getOperand(1);
11032 DAG.ReplaceAllUsesWith(Op, WideVec);
11033 return SDValue();
11034 }
11035
11036 case Intrinsic::ppc_unpack_longdouble: {
11037 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11038 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11039 "Argument of long double unpack must be 0 or 1!");
11040 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11041 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11042 Idx->getValueType(0)));
11043 }
11044
11045 case Intrinsic::ppc_compare_exp_lt:
11046 case Intrinsic::ppc_compare_exp_gt:
11047 case Intrinsic::ppc_compare_exp_eq:
11048 case Intrinsic::ppc_compare_exp_uo: {
11049 unsigned Pred;
11050 switch (IntrinsicID) {
11051 case Intrinsic::ppc_compare_exp_lt:
11052 Pred = PPC::PRED_LT;
11053 break;
11054 case Intrinsic::ppc_compare_exp_gt:
11055 Pred = PPC::PRED_GT;
11056 break;
11057 case Intrinsic::ppc_compare_exp_eq:
11058 Pred = PPC::PRED_EQ;
11059 break;
11060 case Intrinsic::ppc_compare_exp_uo:
11061 Pred = PPC::PRED_UN;
11062 break;
11063 }
11064 return SDValue(
11065 DAG.getMachineNode(
11066 PPC::SELECT_CC_I4, dl, MVT::i32,
11067 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11068 Op.getOperand(1), Op.getOperand(2)),
11069 0),
11070 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11071 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11072 0);
11073 }
11074 case Intrinsic::ppc_test_data_class: {
11075 EVT OpVT = Op.getOperand(1).getValueType();
11076 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11077 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11078 : PPC::XSTSTDCSP);
11079 return SDValue(
11080 DAG.getMachineNode(
11081 PPC::SELECT_CC_I4, dl, MVT::i32,
11082 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11083 Op.getOperand(1)),
11084 0),
11085 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11086 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11087 0);
11088 }
11089 case Intrinsic::ppc_fnmsub: {
11090 EVT VT = Op.getOperand(1).getValueType();
11091 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11092 return DAG.getNode(
11093 ISD::FNEG, dl, VT,
11094 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11095 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11096 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11097 Op.getOperand(2), Op.getOperand(3));
11098 }
11099 case Intrinsic::ppc_convert_f128_to_ppcf128:
11100 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11101 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11102 ? RTLIB::CONVERT_PPCF128_F128
11103 : RTLIB::CONVERT_F128_PPCF128;
11104 MakeLibCallOptions CallOptions;
11105 std::pair<SDValue, SDValue> Result =
11106 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11107 dl, SDValue());
11108 return Result.first;
11109 }
11110 case Intrinsic::ppc_maxfe:
11111 case Intrinsic::ppc_maxfl:
11112 case Intrinsic::ppc_maxfs:
11113 case Intrinsic::ppc_minfe:
11114 case Intrinsic::ppc_minfl:
11115 case Intrinsic::ppc_minfs: {
11116 EVT VT = Op.getValueType();
11117 assert(
11118 all_of(Op->ops().drop_front(4),
11119 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11120 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11121 (void)VT;
11123 if (IntrinsicID == Intrinsic::ppc_minfe ||
11124 IntrinsicID == Intrinsic::ppc_minfl ||
11125 IntrinsicID == Intrinsic::ppc_minfs)
11126 CC = ISD::SETLT;
11127 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11128 SDValue Res = Op.getOperand(I);
11129 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11130 Res =
11131 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11132 }
11133 return Res;
11134 }
11135 }
11136
11137 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11138 // opcode number of the comparison.
11139 int CompareOpc;
11140 bool isDot;
11141 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11142 return SDValue(); // Don't custom lower most intrinsics.
11143
11144 // If this is a non-dot comparison, make the VCMP node and we are done.
11145 if (!isDot) {
11146 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11147 Op.getOperand(1), Op.getOperand(2),
11148 DAG.getConstant(CompareOpc, dl, MVT::i32));
11149 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11150 }
11151
11152 // Create the PPCISD altivec 'dot' comparison node.
11153 SDValue Ops[] = {
11154 Op.getOperand(2), // LHS
11155 Op.getOperand(3), // RHS
11156 DAG.getConstant(CompareOpc, dl, MVT::i32)
11157 };
11158 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11159 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11160
11161 // Now that we have the comparison, emit a copy from the CR to a GPR.
11162 // This is flagged to the above dot comparison.
11163 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11164 DAG.getRegister(PPC::CR6, MVT::i32),
11165 CompNode.getValue(1));
11166
11167 // Unpack the result based on how the target uses it.
11168 unsigned BitNo; // Bit # of CR6.
11169 bool InvertBit; // Invert result?
11170 switch (Op.getConstantOperandVal(1)) {
11171 default: // Can't happen, don't crash on invalid number though.
11172 case 0: // Return the value of the EQ bit of CR6.
11173 BitNo = 0; InvertBit = false;
11174 break;
11175 case 1: // Return the inverted value of the EQ bit of CR6.
11176 BitNo = 0; InvertBit = true;
11177 break;
11178 case 2: // Return the value of the LT bit of CR6.
11179 BitNo = 2; InvertBit = false;
11180 break;
11181 case 3: // Return the inverted value of the LT bit of CR6.
11182 BitNo = 2; InvertBit = true;
11183 break;
11184 }
11185
11186 // Shift the bit into the low position.
11187 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11188 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11189 // Isolate the bit.
11190 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11191 DAG.getConstant(1, dl, MVT::i32));
11192
11193 // If we are supposed to, toggle the bit.
11194 if (InvertBit)
11195 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11196 DAG.getConstant(1, dl, MVT::i32));
11197 return Flags;
11198}
11199
11200SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11201 SelectionDAG &DAG) const {
11202 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11203 // the beginning of the argument list.
11204 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11205 SDLoc DL(Op);
11206 switch (Op.getConstantOperandVal(ArgStart)) {
11207 case Intrinsic::ppc_cfence: {
11208 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11209 SDValue Val = Op.getOperand(ArgStart + 1);
11210 EVT Ty = Val.getValueType();
11211 if (Ty == MVT::i128) {
11212 // FIXME: Testing one of two paired registers is sufficient to guarantee
11213 // ordering?
11214 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11215 }
11216 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11217 EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
11218 return SDValue(
11219 DAG.getMachineNode(Opcode, DL, MVT::Other,
11220 DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
11221 Op.getOperand(0)),
11222 0);
11223 }
11224 default:
11225 break;
11226 }
11227 return SDValue();
11228}
11229
11230// Lower scalar BSWAP64 to xxbrd.
11231SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11232 SDLoc dl(Op);
11233 if (!Subtarget.isPPC64())
11234 return Op;
11235 // MTVSRDD
11236 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11237 Op.getOperand(0));
11238 // XXBRD
11239 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11240 // MFVSRD
11241 int VectorIndex = 0;
11242 if (Subtarget.isLittleEndian())
11243 VectorIndex = 1;
11244 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11245 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11246 return Op;
11247}
11248
11249// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11250// compared to a value that is atomically loaded (atomic loads zero-extend).
11251SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11252 SelectionDAG &DAG) const {
11253 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11254 "Expecting an atomic compare-and-swap here.");
11255 SDLoc dl(Op);
11256 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11257 EVT MemVT = AtomicNode->getMemoryVT();
11258 if (MemVT.getSizeInBits() >= 32)
11259 return Op;
11260
11261 SDValue CmpOp = Op.getOperand(2);
11262 // If this is already correctly zero-extended, leave it alone.
11263 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11264 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11265 return Op;
11266
11267 // Clear the high bits of the compare operand.
11268 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11269 SDValue NewCmpOp =
11270 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11271 DAG.getConstant(MaskVal, dl, MVT::i32));
11272
11273 // Replace the existing compare operand with the properly zero-extended one.
11275 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11276 Ops.push_back(AtomicNode->getOperand(i));
11277 Ops[2] = NewCmpOp;
11278 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11279 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11280 auto NodeTy =
11282 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11283}
11284
11285SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11286 SelectionDAG &DAG) const {
11287 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11288 EVT MemVT = N->getMemoryVT();
11289 assert(MemVT.getSimpleVT() == MVT::i128 &&
11290 "Expect quadword atomic operations");
11291 SDLoc dl(N);
11292 unsigned Opc = N->getOpcode();
11293 switch (Opc) {
11294 case ISD::ATOMIC_LOAD: {
11295 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11296 // lowered to ppc instructions by pattern matching instruction selector.
11297 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11299 N->getOperand(0),
11300 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11301 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11302 Ops.push_back(N->getOperand(I));
11303 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11304 Ops, MemVT, N->getMemOperand());
11305 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11306 SDValue ValHi =
11307 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11308 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11309 DAG.getConstant(64, dl, MVT::i32));
11310 SDValue Val =
11311 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11312 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11313 {Val, LoadedVal.getValue(2)});
11314 }
11315 case ISD::ATOMIC_STORE: {
11316 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11317 // lowered to ppc instructions by pattern matching instruction selector.
11318 SDVTList Tys = DAG.getVTList(MVT::Other);
11320 N->getOperand(0),
11321 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11322 SDValue Val = N->getOperand(1);
11323 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11324 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11325 DAG.getConstant(64, dl, MVT::i32));
11326 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11327 Ops.push_back(ValLo);
11328 Ops.push_back(ValHi);
11329 Ops.push_back(N->getOperand(2));
11330 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11331 N->getMemOperand());
11332 }
11333 default:
11334 llvm_unreachable("Unexpected atomic opcode");
11335 }
11336}
11337
11339 SelectionDAG &DAG,
11340 const PPCSubtarget &Subtarget) {
11341 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11342
11343 enum DataClassMask {
11344 DC_NAN = 1 << 6,
11345 DC_NEG_INF = 1 << 4,
11346 DC_POS_INF = 1 << 5,
11347 DC_NEG_ZERO = 1 << 2,
11348 DC_POS_ZERO = 1 << 3,
11349 DC_NEG_SUBNORM = 1,
11350 DC_POS_SUBNORM = 1 << 1,
11351 };
11352
11353 EVT VT = Op.getValueType();
11354
11355 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11356 : VT == MVT::f64 ? PPC::XSTSTDCDP
11357 : PPC::XSTSTDCSP;
11358
11359 if (Mask == fcAllFlags)
11360 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11361 if (Mask == 0)
11362 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11363
11364 // When it's cheaper or necessary to test reverse flags.
11365 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11366 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11367 return DAG.getNOT(Dl, Rev, MVT::i1);
11368 }
11369
11370 // Power doesn't support testing whether a value is 'normal'. Test the rest
11371 // first, and test if it's 'not not-normal' with expected sign.
11372 if (Mask & fcNormal) {
11373 SDValue Rev(DAG.getMachineNode(
11374 TestOp, Dl, MVT::i32,
11375 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11376 DC_NEG_ZERO | DC_POS_ZERO |
11377 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11378 Dl, MVT::i32),
11379 Op),
11380 0);
11381 // Sign are stored in CR bit 0, result are in CR bit 2.
11382 SDValue Sign(
11383 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11384 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11385 0);
11386 SDValue Normal(DAG.getNOT(
11387 Dl,
11389 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11390 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11391 0),
11392 MVT::i1));
11393 if (Mask & fcPosNormal)
11394 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11395 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11396 if (Mask == fcPosNormal || Mask == fcNegNormal)
11397 return Result;
11398
11399 return DAG.getNode(
11400 ISD::OR, Dl, MVT::i1,
11401 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11402 }
11403
11404 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11405 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11406 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11407 bool IsQuiet = Mask & fcQNan;
11408 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11409
11410 // Quietness is determined by the first bit in fraction field.
11411 uint64_t QuietMask = 0;
11412 SDValue HighWord;
11413 if (VT == MVT::f128) {
11414 HighWord = DAG.getNode(
11415 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11416 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11417 QuietMask = 0x8000;
11418 } else if (VT == MVT::f64) {
11419 if (Subtarget.isPPC64()) {
11420 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11421 DAG.getBitcast(MVT::i64, Op),
11422 DAG.getConstant(1, Dl, MVT::i32));
11423 } else {
11424 SDValue Vec = DAG.getBitcast(
11425 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11426 HighWord = DAG.getNode(
11427 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11428 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11429 }
11430 QuietMask = 0x80000;
11431 } else if (VT == MVT::f32) {
11432 HighWord = DAG.getBitcast(MVT::i32, Op);
11433 QuietMask = 0x400000;
11434 }
11435 SDValue NanRes = DAG.getSetCC(
11436 Dl, MVT::i1,
11437 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11438 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11439 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11440 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11441 if (Mask == fcQNan || Mask == fcSNan)
11442 return NanRes;
11443
11444 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11445 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11446 NanRes);
11447 }
11448
11449 unsigned NativeMask = 0;
11450 if ((Mask & fcNan) == fcNan)
11451 NativeMask |= DC_NAN;
11452 if (Mask & fcNegInf)
11453 NativeMask |= DC_NEG_INF;
11454 if (Mask & fcPosInf)
11455 NativeMask |= DC_POS_INF;
11456 if (Mask & fcNegZero)
11457 NativeMask |= DC_NEG_ZERO;
11458 if (Mask & fcPosZero)
11459 NativeMask |= DC_POS_ZERO;
11460 if (Mask & fcNegSubnormal)
11461 NativeMask |= DC_NEG_SUBNORM;
11462 if (Mask & fcPosSubnormal)
11463 NativeMask |= DC_POS_SUBNORM;
11464 return SDValue(
11465 DAG.getMachineNode(
11466 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11468 TestOp, Dl, MVT::i32,
11469 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11470 0),
11471 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11472 0);
11473}
11474
11475SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11476 SelectionDAG &DAG) const {
11477 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11478 SDValue LHS = Op.getOperand(0);
11479 uint64_t RHSC = Op.getConstantOperandVal(1);
11480 SDLoc Dl(Op);
11481 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11482 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11483}
11484
11485SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11486 SelectionDAG &DAG) const {
11487 SDLoc dl(Op);
11488 // Create a stack slot that is 16-byte aligned.
11490 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11491 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11492 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11493
11494 SDValue Val = Op.getOperand(0);
11495 EVT ValVT = Val.getValueType();
11496 // P10 hardware store forwarding requires that a single store contains all
11497 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11498 // to avoid load hit store on P10 when running binaries compiled for older
11499 // processors by generating two mergeable scalar stores to forward with the
11500 // vector load.
11501 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11502 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11503 ValVT.getSizeInBits() <= 64) {
11504 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11505 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11506 SDValue ShiftBy = DAG.getConstant(
11507 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11508 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11509 SDValue Plus8 =
11510 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11511 SDValue Store2 =
11512 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11513 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11514 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11516 }
11517
11518 // Store the input value into Value#0 of the stack slot.
11519 SDValue Store =
11520 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11521 // Load it out.
11522 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11523}
11524
11525SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11526 SelectionDAG &DAG) const {
11527 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11528 "Should only be called for ISD::INSERT_VECTOR_ELT");
11529
11530 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11531
11532 EVT VT = Op.getValueType();
11533 SDLoc dl(Op);
11534 SDValue V1 = Op.getOperand(0);
11535 SDValue V2 = Op.getOperand(1);
11536
11537 if (VT == MVT::v2f64 && C)
11538 return Op;
11539
11540 if (Subtarget.hasP9Vector()) {
11541 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11542 // because on P10, it allows this specific insert_vector_elt load pattern to
11543 // utilize the refactored load and store infrastructure in order to exploit
11544 // prefixed loads.
11545 // On targets with inexpensive direct moves (Power9 and up), a
11546 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11547 // load since a single precision load will involve conversion to double
11548 // precision on the load followed by another conversion to single precision.
11549 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11550 (isa<LoadSDNode>(V2))) {
11551 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11552 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11553 SDValue InsVecElt =
11554 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11555 BitcastLoad, Op.getOperand(2));
11556 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11557 }
11558 }
11559
11560 if (Subtarget.isISA3_1()) {
11561 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11562 return SDValue();
11563 // On P10, we have legal lowering for constant and variable indices for
11564 // all vectors.
11565 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11566 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11567 return Op;
11568 }
11569
11570 // Before P10, we have legal lowering for constant indices but not for
11571 // variable ones.
11572 if (!C)
11573 return SDValue();
11574
11575 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11576 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11577 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11578 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11579 unsigned InsertAtElement = C->getZExtValue();
11580 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11581 if (Subtarget.isLittleEndian()) {
11582 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11583 }
11584 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11585 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11586 }
11587 return Op;
11588}
11589
11590SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11591 SelectionDAG &DAG) const {
11592 SDLoc dl(Op);
11593 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11594 SDValue LoadChain = LN->getChain();
11595 SDValue BasePtr = LN->getBasePtr();
11596 EVT VT = Op.getValueType();
11597
11598 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11599 return Op;
11600
11601 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11602 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11603 // 2 or 4 vsx registers.
11604 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11605 "Type unsupported without MMA");
11606 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11607 "Type unsupported without paired vector support");
11608 Align Alignment = LN->getAlign();
11610 SmallVector<SDValue, 4> LoadChains;
11611 unsigned NumVecs = VT.getSizeInBits() / 128;
11612 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11613 SDValue Load =
11614 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11615 LN->getPointerInfo().getWithOffset(Idx * 16),
11616 commonAlignment(Alignment, Idx * 16),
11617 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11618 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11619 DAG.getConstant(16, dl, BasePtr.getValueType()));
11620 Loads.push_back(Load);
11621 LoadChains.push_back(Load.getValue(1));
11622 }
11623 if (Subtarget.isLittleEndian()) {
11624 std::reverse(Loads.begin(), Loads.end());
11625 std::reverse(LoadChains.begin(), LoadChains.end());
11626 }
11627 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11628 SDValue Value =
11629 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11630 dl, VT, Loads);
11631 SDValue RetOps[] = {Value, TF};
11632 return DAG.getMergeValues(RetOps, dl);
11633}
11634
11635SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11636 SelectionDAG &DAG) const {
11637 SDLoc dl(Op);
11638 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11639 SDValue StoreChain = SN->getChain();
11640 SDValue BasePtr = SN->getBasePtr();
11641 SDValue Value = SN->getValue();
11642 SDValue Value2 = SN->getValue();
11643 EVT StoreVT = Value.getValueType();
11644
11645 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11646 return Op;
11647
11648 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11649 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11650 // underlying registers individually.
11651 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11652 "Type unsupported without MMA");
11653 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11654 "Type unsupported without paired vector support");
11655 Align Alignment = SN->getAlign();
11657 unsigned NumVecs = 2;
11658 if (StoreVT == MVT::v512i1) {
11659 if (Subtarget.isISAFuture()) {
11660 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11661 MachineSDNode *ExtNode = DAG.getMachineNode(
11662 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
11663
11664 Value = SDValue(ExtNode, 0);
11665 Value2 = SDValue(ExtNode, 1);
11666 } else
11667 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11668 NumVecs = 4;
11669 }
11670 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11671 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11672 SDValue Elt;
11673 if (Subtarget.isISAFuture()) {
11674 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11675 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11676 Idx > 1 ? Value2 : Value,
11677 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11678 } else
11679 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11680 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11681
11682 SDValue Store =
11683 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11684 SN->getPointerInfo().getWithOffset(Idx * 16),
11685 commonAlignment(Alignment, Idx * 16),
11686 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11687 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11688 DAG.getConstant(16, dl, BasePtr.getValueType()));
11689 Stores.push_back(Store);
11690 }
11691 SDValue TF = DAG.getTokenFactor(dl, Stores);
11692 return TF;
11693}
11694
11695SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11696 SDLoc dl(Op);
11697 if (Op.getValueType() == MVT::v4i32) {
11698 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11699
11700 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11701 // +16 as shift amt.
11702 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11703 SDValue RHSSwap = // = vrlw RHS, 16
11704 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11705
11706 // Shrinkify inputs to v8i16.
11707 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11708 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11709 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11710
11711 // Low parts multiplied together, generating 32-bit results (we ignore the
11712 // top parts).
11713 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11714 LHS, RHS, DAG, dl, MVT::v4i32);
11715
11716 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11717 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11718 // Shift the high parts up 16 bits.
11719 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11720 Neg16, DAG, dl);
11721 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11722 } else if (Op.getValueType() == MVT::v16i8) {
11723 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11724 bool isLittleEndian = Subtarget.isLittleEndian();
11725
11726 // Multiply the even 8-bit parts, producing 16-bit sums.
11727 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11728 LHS, RHS, DAG, dl, MVT::v8i16);
11729 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11730
11731 // Multiply the odd 8-bit parts, producing 16-bit sums.
11732 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11733 LHS, RHS, DAG, dl, MVT::v8i16);
11734 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11735
11736 // Merge the results together. Because vmuleub and vmuloub are
11737 // instructions with a big-endian bias, we must reverse the
11738 // element numbering and reverse the meaning of "odd" and "even"
11739 // when generating little endian code.
11740 int Ops[16];
11741 for (unsigned i = 0; i != 8; ++i) {
11742 if (isLittleEndian) {
11743 Ops[i*2 ] = 2*i;
11744 Ops[i*2+1] = 2*i+16;
11745 } else {
11746 Ops[i*2 ] = 2*i+1;
11747 Ops[i*2+1] = 2*i+1+16;
11748 }
11749 }
11750 if (isLittleEndian)
11751 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11752 else
11753 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11754 } else {
11755 llvm_unreachable("Unknown mul to lower!");
11756 }
11757}
11758
11759SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11760 bool IsStrict = Op->isStrictFPOpcode();
11761 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11762 !Subtarget.hasP9Vector())
11763 return SDValue();
11764
11765 return Op;
11766}
11767
11768// Custom lowering for fpext vf32 to v2f64
11769SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11770
11771 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11772 "Should only be called for ISD::FP_EXTEND");
11773
11774 // FIXME: handle extends from half precision float vectors on P9.
11775 // We only want to custom lower an extend from v2f32 to v2f64.
11776 if (Op.getValueType() != MVT::v2f64 ||
11777 Op.getOperand(0).getValueType() != MVT::v2f32)
11778 return SDValue();
11779
11780 SDLoc dl(Op);
11781 SDValue Op0 = Op.getOperand(0);
11782
11783 switch (Op0.getOpcode()) {
11784 default:
11785 return SDValue();
11787 assert(Op0.getNumOperands() == 2 &&
11788 isa<ConstantSDNode>(Op0->getOperand(1)) &&
11789 "Node should have 2 operands with second one being a constant!");
11790
11791 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11792 return SDValue();
11793
11794 // Custom lower is only done for high or low doubleword.
11795 int Idx = Op0.getConstantOperandVal(1);
11796 if (Idx % 2 != 0)
11797 return SDValue();
11798
11799 // Since input is v4f32, at this point Idx is either 0 or 2.
11800 // Shift to get the doubleword position we want.
11801 int DWord = Idx >> 1;
11802
11803 // High and low word positions are different on little endian.
11804 if (Subtarget.isLittleEndian())
11805 DWord ^= 0x1;
11806
11807 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11808 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11809 }
11810 case ISD::FADD:
11811 case ISD::FMUL:
11812 case ISD::FSUB: {
11813 SDValue NewLoad[2];
11814 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11815 // Ensure both input are loads.
11816 SDValue LdOp = Op0.getOperand(i);
11817 if (LdOp.getOpcode() != ISD::LOAD)
11818 return SDValue();
11819 // Generate new load node.
11820 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11821 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11822 NewLoad[i] = DAG.getMemIntrinsicNode(
11823 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11824 LD->getMemoryVT(), LD->getMemOperand());
11825 }
11826 SDValue NewOp =
11827 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11828 NewLoad[1], Op0.getNode()->getFlags());
11829 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11830 DAG.getConstant(0, dl, MVT::i32));
11831 }
11832 case ISD::LOAD: {
11833 LoadSDNode *LD = cast<LoadSDNode>(Op0);
11834 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11835 SDValue NewLd = DAG.getMemIntrinsicNode(
11836 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11837 LD->getMemoryVT(), LD->getMemOperand());
11838 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11839 DAG.getConstant(0, dl, MVT::i32));
11840 }
11841 }
11842 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11843}
11844
11845/// LowerOperation - Provide custom lowering hooks for some operations.
11846///
11848 switch (Op.getOpcode()) {
11849 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11850 case ISD::FPOW: return lowerPow(Op, DAG);
11851 case ISD::FSIN: return lowerSin(Op, DAG);
11852 case ISD::FCOS: return lowerCos(Op, DAG);
11853 case ISD::FLOG: return lowerLog(Op, DAG);
11854 case ISD::FLOG10: return lowerLog10(Op, DAG);
11855 case ISD::FEXP: return lowerExp(Op, DAG);
11856 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11857 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11858 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11859 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11860 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11861 case ISD::STRICT_FSETCC:
11863 case ISD::SETCC: return LowerSETCC(Op, DAG);
11864 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11865 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11866
11867 case ISD::INLINEASM:
11868 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11869 // Variable argument lowering.
11870 case ISD::VASTART: return LowerVASTART(Op, DAG);
11871 case ISD::VAARG: return LowerVAARG(Op, DAG);
11872 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11873
11874 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11875 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11877 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11878
11879 // Exception handling lowering.
11880 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11881 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11882 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11883
11884 case ISD::LOAD: return LowerLOAD(Op, DAG);
11885 case ISD::STORE: return LowerSTORE(Op, DAG);
11886 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11887 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11890 case ISD::FP_TO_UINT:
11891 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11894 case ISD::UINT_TO_FP:
11895 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11896 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
11897
11898 // Lower 64-bit shifts.
11899 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11900 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11901 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11902
11903 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11904 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11905
11906 // Vector-related lowering.
11907 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11908 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11909 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11910 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11911 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11912 case ISD::MUL: return LowerMUL(Op, DAG);
11913 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11915 case ISD::FP_ROUND:
11916 return LowerFP_ROUND(Op, DAG);
11917 case ISD::ROTL: return LowerROTL(Op, DAG);
11918
11919 // For counter-based loop handling.
11920 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11921
11922 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11923
11924 // Frame & Return address.
11925 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11926 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11927
11929 return LowerINTRINSIC_VOID(Op, DAG);
11930 case ISD::BSWAP:
11931 return LowerBSWAP(Op, DAG);
11933 return LowerATOMIC_CMP_SWAP(Op, DAG);
11934 case ISD::ATOMIC_STORE:
11935 return LowerATOMIC_LOAD_STORE(Op, DAG);
11936 case ISD::IS_FPCLASS:
11937 return LowerIS_FPCLASS(Op, DAG);
11938 }
11939}
11940
11943 SelectionDAG &DAG) const {
11944 SDLoc dl(N);
11945 switch (N->getOpcode()) {
11946 default:
11947 llvm_unreachable("Do not know how to custom type legalize this operation!");
11948 case ISD::ATOMIC_LOAD: {
11949 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11950 Results.push_back(Res);
11951 Results.push_back(Res.getValue(1));
11952 break;
11953 }
11954 case ISD::READCYCLECOUNTER: {
11955 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11956 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11957
11958 Results.push_back(
11959 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11960 Results.push_back(RTB.getValue(2));
11961 break;
11962 }
11964 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
11965 break;
11966
11967 assert(N->getValueType(0) == MVT::i1 &&
11968 "Unexpected result type for CTR decrement intrinsic");
11969 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11970 N->getValueType(0));
11971 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11972 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11973 N->getOperand(1));
11974
11975 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11976 Results.push_back(NewInt.getValue(1));
11977 break;
11978 }
11980 switch (N->getConstantOperandVal(0)) {
11981 case Intrinsic::ppc_pack_longdouble:
11982 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11983 N->getOperand(2), N->getOperand(1)));
11984 break;
11985 case Intrinsic::ppc_maxfe:
11986 case Intrinsic::ppc_minfe:
11987 case Intrinsic::ppc_fnmsub:
11988 case Intrinsic::ppc_convert_f128_to_ppcf128:
11989 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11990 break;
11991 }
11992 break;
11993 }
11994 case ISD::VAARG: {
11995 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11996 return;
11997
11998 EVT VT = N->getValueType(0);
11999
12000 if (VT == MVT::i64) {
12001 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12002
12003 Results.push_back(NewNode);
12004 Results.push_back(NewNode.getValue(1));
12005 }
12006 return;
12007 }
12010 case ISD::FP_TO_SINT:
12011 case ISD::FP_TO_UINT: {
12012 // LowerFP_TO_INT() can only handle f32 and f64.
12013 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12014 MVT::ppcf128)
12015 return;
12016 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12017 Results.push_back(LoweredValue);
12018 if (N->isStrictFPOpcode())
12019 Results.push_back(LoweredValue.getValue(1));
12020 return;
12021 }
12022 case ISD::TRUNCATE: {
12023 if (!N->getValueType(0).isVector())
12024 return;
12025 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12026 if (Lowered)
12027 Results.push_back(Lowered);
12028 return;
12029 }
12030 case ISD::FSHL:
12031 case ISD::FSHR:
12032 // Don't handle funnel shifts here.
12033 return;
12034 case ISD::BITCAST:
12035 // Don't handle bitcast here.
12036 return;
12037 case ISD::FP_EXTEND:
12038 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12039 if (Lowered)
12040 Results.push_back(Lowered);
12041 return;
12042 }
12043}
12044
12045//===----------------------------------------------------------------------===//
12046// Other Lowering Code
12047//===----------------------------------------------------------------------===//
12048
12050 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
12051 Function *Func = Intrinsic::getDeclaration(M, Id);
12052 return Builder.CreateCall(Func, {});
12053}
12054
12055// The mappings for emitLeading/TrailingFence is taken from
12056// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12058 Instruction *Inst,
12059 AtomicOrdering Ord) const {
12061 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12062 if (isReleaseOrStronger(Ord))
12063 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12064 return nullptr;
12065}
12066
12068 Instruction *Inst,
12069 AtomicOrdering Ord) const {
12070 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12071 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12072 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12073 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12074 if (isa<LoadInst>(Inst))
12075 return Builder.CreateCall(
12077 Builder.GetInsertBlock()->getParent()->getParent(),
12078 Intrinsic::ppc_cfence, {Inst->getType()}),
12079 {Inst});
12080 // FIXME: Can use isync for rmw operation.
12081 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12082 }
12083 return nullptr;
12084}
12085
12088 unsigned AtomicSize,
12089 unsigned BinOpcode,
12090 unsigned CmpOpcode,
12091 unsigned CmpPred) const {
12092 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12093 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12094
12095 auto LoadMnemonic = PPC::LDARX;
12096 auto StoreMnemonic = PPC::STDCX;
12097 switch (AtomicSize) {
12098 default:
12099 llvm_unreachable("Unexpected size of atomic entity");
12100 case 1:
12101 LoadMnemonic = PPC::LBARX;
12102 StoreMnemonic = PPC::STBCX;
12103 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12104 break;
12105 case 2:
12106 LoadMnemonic = PPC::LHARX;
12107 StoreMnemonic = PPC::STHCX;
12108 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12109 break;
12110 case 4:
12111 LoadMnemonic = PPC::LWARX;
12112 StoreMnemonic = PPC::STWCX;
12113 break;
12114 case 8:
12115 LoadMnemonic = PPC::LDARX;
12116 StoreMnemonic = PPC::STDCX;
12117 break;
12118 }
12119
12120 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12121 MachineFunction *F = BB->getParent();
12123
12124 Register dest = MI.getOperand(0).getReg();
12125 Register ptrA = MI.getOperand(1).getReg();
12126 Register ptrB = MI.getOperand(2).getReg();
12127 Register incr = MI.getOperand(3).getReg();
12128 DebugLoc dl = MI.getDebugLoc();
12129
12130 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12131 MachineBasicBlock *loop2MBB =
12132 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12133 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12134 F->insert(It, loopMBB);
12135 if (CmpOpcode)
12136 F->insert(It, loop2MBB);
12137 F->insert(It, exitMBB);
12138 exitMBB->splice(exitMBB->begin(), BB,
12139 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12141
12142 MachineRegisterInfo &RegInfo = F->getRegInfo();
12143 Register TmpReg = (!BinOpcode) ? incr :
12144 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12145 : &PPC::GPRCRegClass);
12146
12147 // thisMBB:
12148 // ...
12149 // fallthrough --> loopMBB
12150 BB->addSuccessor(loopMBB);
12151
12152 // loopMBB:
12153 // l[wd]arx dest, ptr
12154 // add r0, dest, incr
12155 // st[wd]cx. r0, ptr
12156 // bne- loopMBB
12157 // fallthrough --> exitMBB
12158
12159 // For max/min...
12160 // loopMBB:
12161 // l[wd]arx dest, ptr
12162 // cmpl?[wd] dest, incr
12163 // bgt exitMBB
12164 // loop2MBB:
12165 // st[wd]cx. dest, ptr
12166 // bne- loopMBB
12167 // fallthrough --> exitMBB
12168
12169 BB = loopMBB;
12170 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12171 .addReg(ptrA).addReg(ptrB);
12172 if (BinOpcode)
12173 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12174 if (CmpOpcode) {
12175 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12176 // Signed comparisons of byte or halfword values must be sign-extended.
12177 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12178 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12179 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12180 ExtReg).addReg(dest);
12181 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12182 } else
12183 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12184
12185 BuildMI(BB, dl, TII->get(PPC::BCC))
12186 .addImm(CmpPred)
12187 .addReg(CrReg)
12188 .addMBB(exitMBB);
12189 BB->addSuccessor(loop2MBB);
12190 BB->addSuccessor(exitMBB);
12191 BB = loop2MBB;
12192 }
12193 BuildMI(BB, dl, TII->get(StoreMnemonic))
12194 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12195 BuildMI(BB, dl, TII->get(PPC::BCC))
12196 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12197 BB->addSuccessor(loopMBB);
12198 BB->addSuccessor(exitMBB);
12199
12200 // exitMBB:
12201 // ...
12202 BB = exitMBB;
12203 return BB;
12204}
12205
12207 switch(MI.getOpcode()) {
12208 default:
12209 return false;
12210 case PPC::COPY:
12211 return TII->isSignExtended(MI.getOperand(1).getReg(),
12212 &MI.getMF()->getRegInfo());
12213 case PPC::LHA:
12214 case PPC::LHA8:
12215 case PPC::LHAU:
12216 case PPC::LHAU8:
12217 case PPC::LHAUX:
12218 case PPC::LHAUX8:
12219 case PPC::LHAX:
12220 case PPC::LHAX8:
12221 case PPC::LWA:
12222 case PPC::LWAUX:
12223 case PPC::LWAX:
12224 case PPC::LWAX_32:
12225 case PPC::LWA_32:
12226 case PPC::PLHA:
12227 case PPC::PLHA8:
12228 case PPC::PLHA8pc:
12229 case PPC::PLHApc:
12230 case PPC::PLWA:
12231 case PPC::PLWA8:
12232 case PPC::PLWA8pc:
12233 case PPC::PLWApc:
12234 case PPC::EXTSB:
12235 case PPC::EXTSB8:
12236 case PPC::EXTSB8_32_64:
12237 case PPC::EXTSB8_rec:
12238 case PPC::EXTSB_rec:
12239 case PPC::EXTSH:
12240 case PPC::EXTSH8:
12241 case PPC::EXTSH8_32_64:
12242 case PPC::EXTSH8_rec:
12243 case PPC::EXTSH_rec:
12244 case PPC::EXTSW:
12245 case PPC::EXTSWSLI:
12246 case PPC::EXTSWSLI_32_64:
12247 case PPC::EXTSWSLI_32_64_rec:
12248 case PPC::EXTSWSLI_rec:
12249 case PPC::EXTSW_32:
12250 case PPC::EXTSW_32_64:
12251 case PPC::EXTSW_32_64_rec:
12252 case PPC::EXTSW_rec:
12253 case PPC::SRAW:
12254 case PPC::SRAWI:
12255 case PPC::SRAWI_rec:
12256 case PPC::SRAW_rec:
12257 return true;
12258 }
12259 return false;
12260}
12261
12264 bool is8bit, // operation
12265 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12266 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12267 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12268
12269 // If this is a signed comparison and the value being compared is not known
12270 // to be sign extended, sign extend it here.
12271 DebugLoc dl = MI.getDebugLoc();
12272 MachineFunction *F = BB->getParent();
12273 MachineRegisterInfo &RegInfo = F->getRegInfo();
12274 Register incr = MI.getOperand(3).getReg();
12275 bool IsSignExtended =
12276 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12277
12278 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12279 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12280 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12281 .addReg(MI.getOperand(3).getReg());
12282 MI.getOperand(3).setReg(ValueReg);
12283 incr = ValueReg;
12284 }
12285 // If we support part-word atomic mnemonics, just use them
12286 if (Subtarget.hasPartwordAtomics())
12287 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12288 CmpPred);
12289
12290 // In 64 bit mode we have to use 64 bits for addresses, even though the
12291 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12292 // registers without caring whether they're 32 or 64, but here we're
12293 // doing actual arithmetic on the addresses.
12294 bool is64bit = Subtarget.isPPC64();
12295 bool isLittleEndian = Subtarget.isLittleEndian();
12296 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12297
12298 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12300
12301 Register dest = MI.getOperand(0).getReg();
12302 Register ptrA = MI.getOperand(1).getReg();
12303 Register ptrB = MI.getOperand(2).getReg();
12304
12305 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12306 MachineBasicBlock *loop2MBB =
12307 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12308 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12309 F->insert(It, loopMBB);
12310 if (CmpOpcode)
12311 F->insert(It, loop2MBB);
12312 F->insert(It, exitMBB);
12313 exitMBB->splice(exitMBB->begin(), BB,
12314 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12316
12317 const TargetRegisterClass *RC =
12318 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12319 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12320
12321 Register PtrReg = RegInfo.createVirtualRegister(RC);
12322 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12323 Register ShiftReg =
12324 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12325 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12326 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12327 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12328 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12329 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12330 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12331 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12332 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12333 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12334 Register Ptr1Reg;
12335 Register TmpReg =
12336 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12337
12338 // thisMBB:
12339 // ...
12340 // fallthrough --> loopMBB
12341 BB->addSuccessor(loopMBB);
12342
12343 // The 4-byte load must be aligned, while a char or short may be
12344 // anywhere in the word. Hence all this nasty bookkeeping code.
12345 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12346 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12347 // xori shift, shift1, 24 [16]
12348 // rlwinm ptr, ptr1, 0, 0, 29
12349 // slw incr2, incr, shift
12350 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12351 // slw mask, mask2, shift
12352 // loopMBB:
12353 // lwarx tmpDest, ptr
12354 // add tmp, tmpDest, incr2
12355 // andc tmp2, tmpDest, mask
12356 // and tmp3, tmp, mask
12357 // or tmp4, tmp3, tmp2
12358 // stwcx. tmp4, ptr
12359 // bne- loopMBB
12360 // fallthrough --> exitMBB
12361 // srw SrwDest, tmpDest, shift
12362 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12363 if (ptrA != ZeroReg) {
12364 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12365 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12366 .addReg(ptrA)
12367 .addReg(ptrB);
12368 } else {
12369 Ptr1Reg = ptrB;
12370 }
12371 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12372 // mode.
12373 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12374 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12375 .addImm(3)
12376 .addImm(27)
12377 .addImm(is8bit ? 28 : 27);
12378 if (!isLittleEndian)
12379 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12380 .addReg(Shift1Reg)
12381 .addImm(is8bit ? 24 : 16);
12382 if (is64bit)
12383 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12384 .addReg(Ptr1Reg)
12385 .addImm(0)
12386 .addImm(61);
12387 else
12388 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12389 .addReg(Ptr1Reg)
12390 .addImm(0)
12391 .addImm(0)
12392 .addImm(29);
12393 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12394 if (is8bit)
12395 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12396 else {
12397 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12398 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12399 .addReg(Mask3Reg)
12400 .addImm(65535);
12401 }
12402 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12403 .addReg(Mask2Reg)
12404 .addReg(ShiftReg);
12405
12406 BB = loopMBB;
12407 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12408 .addReg(ZeroReg)
12409 .addReg(PtrReg);
12410 if (BinOpcode)
12411 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12412 .addReg(Incr2Reg)
12413 .addReg(TmpDestReg);
12414 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12415 .addReg(TmpDestReg)
12416 .addReg(MaskReg);
12417 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12418 if (CmpOpcode) {
12419 // For unsigned comparisons, we can directly compare the shifted values.
12420 // For signed comparisons we shift and sign extend.
12421 Register SReg = RegInfo.createVirtualRegister(GPRC);
12422 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12423 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12424 .addReg(TmpDestReg)
12425 .addReg(MaskReg);
12426 unsigned ValueReg = SReg;
12427 unsigned CmpReg = Incr2Reg;
12428 if (CmpOpcode == PPC::CMPW) {
12429 ValueReg = RegInfo.createVirtualRegister(GPRC);
12430 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12431 .addReg(SReg)
12432 .addReg(ShiftReg);
12433 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12434 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12435 .addReg(ValueReg);
12436 ValueReg = ValueSReg;
12437 CmpReg = incr;
12438 }
12439 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12440 BuildMI(BB, dl, TII->get(PPC::BCC))
12441 .addImm(CmpPred)
12442 .addReg(CrReg)
12443 .addMBB(exitMBB);
12444 BB->addSuccessor(loop2MBB);
12445 BB->addSuccessor(exitMBB);
12446 BB = loop2MBB;
12447 }
12448 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12449 BuildMI(BB, dl, TII->get(PPC::STWCX))
12450 .addReg(Tmp4Reg)
12451 .addReg(ZeroReg)
12452 .addReg(PtrReg);
12453 BuildMI(BB, dl, TII->get(PPC::BCC))
12455 .addReg(PPC::CR0)
12456 .addMBB(loopMBB);
12457 BB->addSuccessor(loopMBB);
12458 BB->addSuccessor(exitMBB);
12459
12460 // exitMBB:
12461 // ...
12462 BB = exitMBB;
12463 // Since the shift amount is not a constant, we need to clear
12464 // the upper bits with a separate RLWINM.
12465 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12466 .addReg(SrwDestReg)
12467 .addImm(0)
12468 .addImm(is8bit ? 24 : 16)
12469 .addImm(31);
12470 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12471 .addReg(TmpDestReg)
12472 .addReg(ShiftReg);
12473 return BB;
12474}
12475
12478 MachineBasicBlock *MBB) const {
12479 DebugLoc DL = MI.getDebugLoc();
12480 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12481 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12482
12483 MachineFunction *MF = MBB->getParent();
12485
12486 const BasicBlock *BB = MBB->getBasicBlock();
12488
12489 Register DstReg = MI.getOperand(0).getReg();
12490 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12491 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12492 Register mainDstReg = MRI.createVirtualRegister(RC);
12493 Register restoreDstReg = MRI.createVirtualRegister(RC);
12494
12495 MVT PVT = getPointerTy(MF->getDataLayout());
12496 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12497 "Invalid Pointer Size!");
12498 // For v = setjmp(buf), we generate
12499 //
12500 // thisMBB:
12501 // SjLjSetup mainMBB
12502 // bl mainMBB
12503 // v_restore = 1
12504 // b sinkMBB
12505 //
12506 // mainMBB:
12507 // buf[LabelOffset] = LR
12508 // v_main = 0
12509 //
12510 // sinkMBB:
12511 // v = phi(main, restore)
12512 //
12513
12514 MachineBasicBlock *thisMBB = MBB;
12515 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12516 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12517 MF->insert(I, mainMBB);
12518 MF->insert(I, sinkMBB);
12519
12521
12522 // Transfer the remainder of BB and its successor edges to sinkMBB.
12523 sinkMBB->splice(sinkMBB->begin(), MBB,
12524 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12526
12527 // Note that the structure of the jmp_buf used here is not compatible
12528 // with that used by libc, and is not designed to be. Specifically, it
12529 // stores only those 'reserved' registers that LLVM does not otherwise
12530 // understand how to spill. Also, by convention, by the time this
12531 // intrinsic is called, Clang has already stored the frame address in the
12532 // first slot of the buffer and stack address in the third. Following the
12533 // X86 target code, we'll store the jump address in the second slot. We also
12534 // need to save the TOC pointer (R2) to handle jumps between shared
12535 // libraries, and that will be stored in the fourth slot. The thread
12536 // identifier (R13) is not affected.
12537
12538 // thisMBB:
12539 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12540 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12541 const int64_t BPOffset = 4 * PVT.getStoreSize();
12542
12543 // Prepare IP either in reg.
12544 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12545 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12546 Register BufReg = MI.getOperand(1).getReg();
12547
12548 if (Subtarget.is64BitELFABI()) {
12550 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12551 .addReg(PPC::X2)
12552 .addImm(TOCOffset)
12553 .addReg(BufReg)
12554 .cloneMemRefs(MI);
12555 }
12556
12557 // Naked functions never have a base pointer, and so we use r1. For all
12558 // other functions, this decision must be delayed until during PEI.
12559 unsigned BaseReg;
12560 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12561 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12562 else
12563 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12564
12565 MIB = BuildMI(*thisMBB, MI, DL,
12566 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12567 .addReg(BaseReg)
12568 .addImm(BPOffset)
12569 .addReg(BufReg)
12570 .cloneMemRefs(MI);
12571
12572 // Setup
12573 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12574 MIB.addRegMask(TRI->getNoPreservedMask());
12575
12576 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12577
12578 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12579 .addMBB(mainMBB);
12580 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12581
12582 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12583 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12584
12585 // mainMBB:
12586 // mainDstReg = 0
12587 MIB =
12588 BuildMI(mainMBB, DL,
12589 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12590
12591 // Store IP
12592 if (Subtarget.isPPC64()) {
12593 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12594 .addReg(LabelReg)
12595 .addImm(LabelOffset)
12596 .addReg(BufReg);
12597 } else {
12598 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12599 .addReg(LabelReg)
12600 .addImm(LabelOffset)
12601 .addReg(BufReg);
12602 }
12603 MIB.cloneMemRefs(MI);
12604
12605 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12606 mainMBB->addSuccessor(sinkMBB);
12607
12608 // sinkMBB:
12609 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12610 TII->get(PPC::PHI), DstReg)
12611 .addReg(mainDstReg).addMBB(mainMBB)
12612 .addReg(restoreDstReg).addMBB(thisMBB);
12613
12614 MI.eraseFromParent();
12615 return sinkMBB;
12616}
12617
12620 MachineBasicBlock *MBB) const {
12621 DebugLoc DL = MI.getDebugLoc();
12622 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12623
12624 MachineFunction *MF = MBB->getParent();
12626
12627 MVT PVT = getPointerTy(MF->getDataLayout());
12628 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12629 "Invalid Pointer Size!");
12630
12631 const TargetRegisterClass *RC =
12632 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12633 Register Tmp = MRI.createVirtualRegister(RC);
12634 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12635 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12636 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12637 unsigned BP =
12638 (PVT == MVT::i64)
12639 ? PPC::X30
12640 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12641 : PPC::R30);
12642
12644
12645 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12646 const int64_t SPOffset = 2 * PVT.getStoreSize();
12647 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12648 const int64_t BPOffset = 4 * PVT.getStoreSize();
12649
12650 Register BufReg = MI.getOperand(0).getReg();
12651
12652 // Reload FP (the jumped-to function may not have had a
12653 // frame pointer, and if so, then its r31 will be restored
12654 // as necessary).
12655 if (PVT == MVT::i64) {
12656 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12657 .addImm(0)
12658 .addReg(BufReg);
12659 } else {
12660 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12661 .addImm(0)
12662 .addReg(BufReg);
12663 }
12664 MIB.cloneMemRefs(MI);
12665
12666 // Reload IP
12667 if (PVT == MVT::i64) {
12668 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12669 .addImm(LabelOffset)
12670 .addReg(BufReg);
12671 } else {
12672 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12673 .addImm(LabelOffset)
12674 .addReg(BufReg);
12675 }
12676 MIB.cloneMemRefs(MI);
12677
12678 // Reload SP
12679 if (PVT == MVT::i64) {
12680 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12681 .addImm(SPOffset)
12682 .addReg(BufReg);
12683 } else {
12684 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12685 .addImm(SPOffset)
12686 .addReg(BufReg);
12687 }
12688 MIB.cloneMemRefs(MI);
12689
12690 // Reload BP
12691 if (PVT == MVT::i64) {
12692 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12693 .addImm(BPOffset)
12694 .addReg(BufReg);
12695 } else {
12696 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12697 .addImm(BPOffset)
12698 .addReg(BufReg);
12699 }
12700 MIB.cloneMemRefs(MI);
12701
12702 // Reload TOC
12703 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12705 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12706 .addImm(TOCOffset)
12707 .addReg(BufReg)
12708 .cloneMemRefs(MI);
12709 }
12710
12711 // Jump
12712 BuildMI(*MBB, MI, DL,
12713 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12714 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12715
12716 MI.eraseFromParent();
12717 return MBB;
12718}
12719
12721 // If the function specifically requests inline stack probes, emit them.
12722 if (MF.getFunction().hasFnAttribute("probe-stack"))
12723 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12724 "inline-asm";
12725 return false;
12726}
12727
12729 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12730 unsigned StackAlign = TFI->getStackAlignment();
12731 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12732 "Unexpected stack alignment");
12733 // The default stack probe size is 4096 if the function has no
12734 // stack-probe-size attribute.
12735 const Function &Fn = MF.getFunction();
12736 unsigned StackProbeSize =
12737 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12738 // Round down to the stack alignment.
12739 StackProbeSize &= ~(StackAlign - 1);
12740 return StackProbeSize ? StackProbeSize : StackAlign;
12741}
12742
12743// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12744// into three phases. In the first phase, it uses pseudo instruction
12745// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12746// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12747// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12748// MaxCallFrameSize so that it can calculate correct data area pointer.
12751 MachineBasicBlock *MBB) const {
12752 const bool isPPC64 = Subtarget.isPPC64();
12753 MachineFunction *MF = MBB->getParent();
12754 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12755 DebugLoc DL = MI.getDebugLoc();
12756 const unsigned ProbeSize = getStackProbeSize(*MF);
12757 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12759 // The CFG of probing stack looks as
12760 // +-----+
12761 // | MBB |
12762 // +--+--+
12763 // |
12764 // +----v----+
12765 // +--->+ TestMBB +---+
12766 // | +----+----+ |
12767 // | | |
12768 // | +-----v----+ |
12769 // +---+ BlockMBB | |
12770 // +----------+ |
12771 // |
12772 // +---------+ |
12773 // | TailMBB +<--+
12774 // +---------+
12775 // In MBB, calculate previous frame pointer and final stack pointer.
12776 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12777 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12778 // TailMBB is spliced via \p MI.
12779 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12780 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12781 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12782
12784 MF->insert(MBBIter, TestMBB);
12785 MF->insert(MBBIter, BlockMBB);
12786 MF->insert(MBBIter, TailMBB);
12787
12788 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12789 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12790
12791 Register DstReg = MI.getOperand(0).getReg();
12792 Register NegSizeReg = MI.getOperand(1).getReg();
12793 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12794 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12795 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12796 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12797
12798 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12799 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12800 // NegSize.
12801 unsigned ProbeOpc;
12802 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12803 ProbeOpc =
12804 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12805 else
12806 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12807 // and NegSizeReg will be allocated in the same phyreg to avoid
12808 // redundant copy when NegSizeReg has only one use which is current MI and
12809 // will be replaced by PREPARE_PROBED_ALLOCA then.
12810 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12811 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12812 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12813 .addDef(ActualNegSizeReg)
12814 .addReg(NegSizeReg)
12815 .add(MI.getOperand(2))
12816 .add(MI.getOperand(3));
12817
12818 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12819 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12820 FinalStackPtr)
12821 .addReg(SPReg)
12822 .addReg(ActualNegSizeReg);
12823
12824 // Materialize a scratch register for update.
12825 int64_t NegProbeSize = -(int64_t)ProbeSize;
12826 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12827 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12828 if (!isInt<16>(NegProbeSize)) {
12829 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12830 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12831 .addImm(NegProbeSize >> 16);
12832 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12833 ScratchReg)
12834 .addReg(TempReg)
12835 .addImm(NegProbeSize & 0xFFFF);
12836 } else
12837 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12838 .addImm(NegProbeSize);
12839
12840 {
12841 // Probing leading residual part.
12842 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12843 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12844 .addReg(ActualNegSizeReg)
12845 .addReg(ScratchReg);
12846 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12847 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12848 .addReg(Div)
12849 .addReg(ScratchReg);
12850 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12851 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12852 .addReg(Mul)
12853 .addReg(ActualNegSizeReg);
12854 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12855 .addReg(FramePointer)
12856 .addReg(SPReg)
12857 .addReg(NegMod);
12858 }
12859
12860 {
12861 // Remaining part should be multiple of ProbeSize.
12862 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12863 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12864 .addReg(SPReg)
12865 .addReg(FinalStackPtr);
12866 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12868 .addReg(CmpResult)
12869 .addMBB(TailMBB);
12870 TestMBB->addSuccessor(BlockMBB);
12871 TestMBB->addSuccessor(TailMBB);
12872 }
12873
12874 {
12875 // Touch the block.
12876 // |P...|P...|P...
12877 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12878 .addReg(FramePointer)
12879 .addReg(SPReg)
12880 .addReg(ScratchReg);
12881 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12882 BlockMBB->addSuccessor(TestMBB);
12883 }
12884
12885 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12886 // DYNAREAOFFSET pseudo instruction to get the future result.
12887 Register MaxCallFrameSizeReg =
12888 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12889 BuildMI(TailMBB, DL,
12890 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12891 MaxCallFrameSizeReg)
12892 .add(MI.getOperand(2))
12893 .add(MI.getOperand(3));
12894 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12895 .addReg(SPReg)
12896 .addReg(MaxCallFrameSizeReg);
12897
12898 // Splice instructions after MI to TailMBB.
12899 TailMBB->splice(TailMBB->end(), MBB,
12900 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12902 MBB->addSuccessor(TestMBB);
12903
12904 // Delete the pseudo instruction.
12905 MI.eraseFromParent();
12906
12907 ++NumDynamicAllocaProbed;
12908 return TailMBB;
12909}
12910
12912 switch (MI.getOpcode()) {
12913 case PPC::SELECT_CC_I4:
12914 case PPC::SELECT_CC_I8:
12915 case PPC::SELECT_CC_F4:
12916 case PPC::SELECT_CC_F8:
12917 case PPC::SELECT_CC_F16:
12918 case PPC::SELECT_CC_VRRC:
12919 case PPC::SELECT_CC_VSFRC:
12920 case PPC::SELECT_CC_VSSRC:
12921 case PPC::SELECT_CC_VSRC:
12922 case PPC::SELECT_CC_SPE4:
12923 case PPC::SELECT_CC_SPE:
12924 return true;
12925 default:
12926 return false;
12927 }
12928}
12929
12930static bool IsSelect(MachineInstr &MI) {
12931 switch (MI.getOpcode()) {
12932 case PPC::SELECT_I4:
12933 case PPC::SELECT_I8:
12934 case PPC::SELECT_F4:
12935 case PPC::SELECT_F8:
12936 case PPC::SELECT_F16:
12937 case PPC::SELECT_SPE:
12938 case PPC::SELECT_SPE4:
12939 case PPC::SELECT_VRRC:
12940 case PPC::SELECT_VSFRC:
12941 case PPC::SELECT_VSSRC:
12942 case PPC::SELECT_VSRC:
12943 return true;
12944 default:
12945 return false;
12946 }
12947}
12948
12951 MachineBasicBlock *BB) const {
12952 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12953 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12954 if (Subtarget.is64BitELFABI() &&
12955 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12956 !Subtarget.isUsingPCRelativeCalls()) {
12957 // Call lowering should have added an r2 operand to indicate a dependence
12958 // on the TOC base pointer value. It can't however, because there is no
12959 // way to mark the dependence as implicit there, and so the stackmap code
12960 // will confuse it with a regular operand. Instead, add the dependence
12961 // here.
12962 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12963 }
12964
12965 return emitPatchPoint(MI, BB);
12966 }
12967
12968 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12969 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12970 return emitEHSjLjSetJmp(MI, BB);
12971 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12972 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12973 return emitEHSjLjLongJmp(MI, BB);
12974 }
12975
12976 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12977
12978 // To "insert" these instructions we actually have to insert their
12979 // control-flow patterns.
12980 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12982
12983 MachineFunction *F = BB->getParent();
12984 MachineRegisterInfo &MRI = F->getRegInfo();
12985
12986 if (Subtarget.hasISEL() &&
12987 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12988 MI.getOpcode() == PPC::SELECT_CC_I8 ||
12989 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
12991 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12992 MI.getOpcode() == PPC::SELECT_CC_I8)
12993 Cond.push_back(MI.getOperand(4));
12994 else
12996 Cond.push_back(MI.getOperand(1));
12997
12998 DebugLoc dl = MI.getDebugLoc();
12999 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13000 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13001 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13002 // The incoming instruction knows the destination vreg to set, the
13003 // condition code register to branch on, the true/false values to
13004 // select between, and a branch opcode to use.
13005
13006 // thisMBB:
13007 // ...
13008 // TrueVal = ...
13009 // cmpTY ccX, r1, r2
13010 // bCC sinkMBB
13011 // fallthrough --> copy0MBB
13012 MachineBasicBlock *thisMBB = BB;
13013 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13014 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13015 DebugLoc dl = MI.getDebugLoc();
13016 F->insert(It, copy0MBB);
13017 F->insert(It, sinkMBB);
13018
13019 // Set the call frame size on entry to the new basic blocks.
13020 // See https://reviews.llvm.org/D156113.
13021 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13022 copy0MBB->setCallFrameSize(CallFrameSize);
13023 sinkMBB->setCallFrameSize(CallFrameSize);
13024
13025 // Transfer the remainder of BB and its successor edges to sinkMBB.
13026 sinkMBB->splice(sinkMBB->begin(), BB,
13027 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13029
13030 // Next, add the true and fallthrough blocks as its successors.
13031 BB->addSuccessor(copy0MBB);
13032 BB->addSuccessor(sinkMBB);
13033
13034 if (IsSelect(MI)) {
13035 BuildMI(BB, dl, TII->get(PPC::BC))
13036 .addReg(MI.getOperand(1).getReg())
13037 .addMBB(sinkMBB);
13038 } else {
13039 unsigned SelectPred = MI.getOperand(4).getImm();
13040 BuildMI(BB, dl, TII->get(PPC::BCC))
13041 .addImm(SelectPred)
13042 .addReg(MI.getOperand(1).getReg())
13043 .addMBB(sinkMBB);
13044 }
13045
13046 // copy0MBB:
13047 // %FalseValue = ...
13048 // # fallthrough to sinkMBB
13049 BB = copy0MBB;
13050
13051 // Update machine-CFG edges
13052 BB->addSuccessor(sinkMBB);
13053
13054 // sinkMBB:
13055 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13056 // ...
13057 BB = sinkMBB;
13058 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13059 .addReg(MI.getOperand(3).getReg())
13060 .addMBB(copy0MBB)
13061 .addReg(MI.getOperand(2).getReg())
13062 .addMBB(thisMBB);
13063 } else if (MI.getOpcode() == PPC::ReadTB) {
13064 // To read the 64-bit time-base register on a 32-bit target, we read the
13065 // two halves. Should the counter have wrapped while it was being read, we
13066 // need to try again.
13067 // ...
13068 // readLoop:
13069 // mfspr Rx,TBU # load from TBU
13070 // mfspr Ry,TB # load from TB
13071 // mfspr Rz,TBU # load from TBU
13072 // cmpw crX,Rx,Rz # check if 'old'='new'
13073 // bne readLoop # branch if they're not equal
13074 // ...
13075
13076 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13077 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13078 DebugLoc dl = MI.getDebugLoc();
13079 F->insert(It, readMBB);
13080 F->insert(It, sinkMBB);
13081
13082 // Transfer the remainder of BB and its successor edges to sinkMBB.
13083 sinkMBB->splice(sinkMBB->begin(), BB,
13084 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13086
13087 BB->addSuccessor(readMBB);
13088 BB = readMBB;
13089
13090 MachineRegisterInfo &RegInfo = F->getRegInfo();
13091 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13092 Register LoReg = MI.getOperand(0).getReg();
13093 Register HiReg = MI.getOperand(1).getReg();
13094
13095 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13096 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13097 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13098
13099 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13100
13101 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13102 .addReg(HiReg)
13103 .addReg(ReadAgainReg);
13104 BuildMI(BB, dl, TII->get(PPC::BCC))
13106 .addReg(CmpReg)
13107 .addMBB(readMBB);
13108
13109 BB->addSuccessor(readMBB);
13110 BB->addSuccessor(sinkMBB);
13111 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13112 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13113 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13114 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13115 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13116 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13117 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13118 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13119
13120 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13121 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13122 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13123 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13124 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13125 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13126 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13127 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13128
13129 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13130 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13131 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13132 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13133 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13134 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13135 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13136 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13137
13138 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13139 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13140 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13141 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13142 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13143 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13144 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13145 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13146
13147 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13148 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13149 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13150 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13151 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13152 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13153 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13154 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13155
13156 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13157 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13158 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13159 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13160 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13161 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13162 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13163 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13164
13165 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13166 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13167 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13168 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13169 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13170 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13171 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13172 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13173
13174 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13175 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13176 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13177 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13178 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13179 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13180 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13181 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13182
13183 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13184 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13185 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13186 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13187 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13188 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13189 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13190 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13191
13192 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13193 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13194 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13195 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13196 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13197 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13198 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13199 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13200
13201 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13202 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13203 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13204 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13205 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13206 BB = EmitAtomicBinary(MI, BB, 4, 0);
13207 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13208 BB = EmitAtomicBinary(MI, BB, 8, 0);
13209 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13210 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13211 (Subtarget.hasPartwordAtomics() &&
13212 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13213 (Subtarget.hasPartwordAtomics() &&
13214 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13215 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13216
13217 auto LoadMnemonic = PPC::LDARX;
13218 auto StoreMnemonic = PPC::STDCX;
13219 switch (MI.getOpcode()) {
13220 default:
13221 llvm_unreachable("Compare and swap of unknown size");
13222 case PPC::ATOMIC_CMP_SWAP_I8:
13223 LoadMnemonic = PPC::LBARX;
13224 StoreMnemonic = PPC::STBCX;
13225 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13226 break;
13227 case PPC::ATOMIC_CMP_SWAP_I16:
13228 LoadMnemonic = PPC::LHARX;
13229 StoreMnemonic = PPC::STHCX;
13230 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13231 break;
13232 case PPC::ATOMIC_CMP_SWAP_I32:
13233 LoadMnemonic = PPC::LWARX;
13234 StoreMnemonic = PPC::STWCX;
13235 break;
13236 case PPC::ATOMIC_CMP_SWAP_I64:
13237 LoadMnemonic = PPC::LDARX;
13238 StoreMnemonic = PPC::STDCX;
13239 break;
13240 }
13241 MachineRegisterInfo &RegInfo = F->getRegInfo();
13242 Register dest = MI.getOperand(0).getReg();
13243 Register ptrA = MI.getOperand(1).getReg();
13244 Register ptrB = MI.getOperand(2).getReg();
13245 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13246 Register oldval = MI.getOperand(3).getReg();
13247 Register newval = MI.getOperand(4).getReg();
13248 DebugLoc dl = MI.getDebugLoc();
13249
13250 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13251 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13252 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13253 F->insert(It, loop1MBB);
13254 F->insert(It, loop2MBB);
13255 F->insert(It, exitMBB);
13256 exitMBB->splice(exitMBB->begin(), BB,
13257 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13259
13260 // thisMBB:
13261 // ...
13262 // fallthrough --> loopMBB
13263 BB->addSuccessor(loop1MBB);
13264
13265 // loop1MBB:
13266 // l[bhwd]arx dest, ptr
13267 // cmp[wd] dest, oldval
13268 // bne- exitBB
13269 // loop2MBB:
13270 // st[bhwd]cx. newval, ptr
13271 // bne- loopMBB
13272 // b exitBB
13273 // exitBB:
13274 BB = loop1MBB;
13275 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13276 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13277 .addReg(dest)
13278 .addReg(oldval);
13279 BuildMI(BB, dl, TII->get(PPC::BCC))
13281 .addReg(CrReg)
13282 .addMBB(exitMBB);
13283 BB->addSuccessor(loop2MBB);
13284 BB->addSuccessor(exitMBB);
13285
13286 BB = loop2MBB;
13287 BuildMI(BB, dl, TII->get(StoreMnemonic))
13288 .addReg(newval)
13289 .addReg(ptrA)
13290 .addReg(ptrB);
13291 BuildMI(BB, dl, TII->get(PPC::BCC))
13293 .addReg(PPC::CR0)
13294 .addMBB(loop1MBB);
13295 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13296 BB->addSuccessor(loop1MBB);
13297 BB->addSuccessor(exitMBB);
13298
13299 // exitMBB:
13300 // ...
13301 BB = exitMBB;
13302 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13303 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13304 // We must use 64-bit registers for addresses when targeting 64-bit,
13305 // since we're actually doing arithmetic on them. Other registers
13306 // can be 32-bit.
13307 bool is64bit = Subtarget.isPPC64();
13308 bool isLittleEndian = Subtarget.isLittleEndian();
13309 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13310
13311 Register dest = MI.getOperand(0).getReg();
13312 Register ptrA = MI.getOperand(1).getReg();
13313 Register ptrB = MI.getOperand(2).getReg();
13314 Register oldval = MI.getOperand(3).getReg();
13315 Register newval = MI.getOperand(4).getReg();
13316 DebugLoc dl = MI.getDebugLoc();
13317
13318 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13319 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13320 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13321 F->insert(It, loop1MBB);
13322 F->insert(It, loop2MBB);
13323 F->insert(It, exitMBB);
13324 exitMBB->splice(exitMBB->begin(), BB,
13325 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13327
13328 MachineRegisterInfo &RegInfo = F->getRegInfo();
13329 const TargetRegisterClass *RC =
13330 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13331 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13332
13333 Register PtrReg = RegInfo.createVirtualRegister(RC);
13334 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13335 Register ShiftReg =
13336 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13337 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13338 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13339 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13340 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13341 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13342 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13343 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13344 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13345 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13346 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13347 Register Ptr1Reg;
13348 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13349 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13350 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13351 // thisMBB:
13352 // ...
13353 // fallthrough --> loopMBB
13354 BB->addSuccessor(loop1MBB);
13355
13356 // The 4-byte load must be aligned, while a char or short may be
13357 // anywhere in the word. Hence all this nasty bookkeeping code.
13358 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13359 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13360 // xori shift, shift1, 24 [16]
13361 // rlwinm ptr, ptr1, 0, 0, 29
13362 // slw newval2, newval, shift
13363 // slw oldval2, oldval,shift
13364 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13365 // slw mask, mask2, shift
13366 // and newval3, newval2, mask
13367 // and oldval3, oldval2, mask
13368 // loop1MBB:
13369 // lwarx tmpDest, ptr
13370 // and tmp, tmpDest, mask
13371 // cmpw tmp, oldval3
13372 // bne- exitBB
13373 // loop2MBB:
13374 // andc tmp2, tmpDest, mask
13375 // or tmp4, tmp2, newval3
13376 // stwcx. tmp4, ptr
13377 // bne- loop1MBB
13378 // b exitBB
13379 // exitBB:
13380 // srw dest, tmpDest, shift
13381 if (ptrA != ZeroReg) {
13382 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13383 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13384 .addReg(ptrA)
13385 .addReg(ptrB);
13386 } else {
13387 Ptr1Reg = ptrB;
13388 }
13389
13390 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13391 // mode.
13392 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13393 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13394 .addImm(3)
13395 .addImm(27)
13396 .addImm(is8bit ? 28 : 27);
13397 if (!isLittleEndian)
13398 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13399 .addReg(Shift1Reg)
13400 .addImm(is8bit ? 24 : 16);
13401 if (is64bit)
13402 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13403 .addReg(Ptr1Reg)
13404 .addImm(0)
13405 .addImm(61);
13406 else
13407 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13408 .addReg(Ptr1Reg)
13409 .addImm(0)
13410 .addImm(0)
13411 .addImm(29);
13412 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13413 .addReg(newval)
13414 .addReg(ShiftReg);
13415 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13416 .addReg(oldval)
13417 .addReg(ShiftReg);
13418 if (is8bit)
13419 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13420 else {
13421 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13422 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13423 .addReg(Mask3Reg)
13424 .addImm(65535);
13425 }
13426 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13427 .addReg(Mask2Reg)
13428 .addReg(ShiftReg);
13429 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13430 .addReg(NewVal2Reg)
13431 .addReg(MaskReg);
13432 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13433 .addReg(OldVal2Reg)
13434 .addReg(MaskReg);
13435
13436 BB = loop1MBB;
13437 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13438 .addReg(ZeroReg)
13439 .addReg(PtrReg);
13440 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13441 .addReg(TmpDestReg)
13442 .addReg(MaskReg);
13443 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13444 .addReg(TmpReg)
13445 .addReg(OldVal3Reg);
13446 BuildMI(BB, dl, TII->get(PPC::BCC))
13448 .addReg(CrReg)
13449 .addMBB(exitMBB);
13450 BB->addSuccessor(loop2MBB);
13451 BB->addSuccessor(exitMBB);
13452
13453 BB = loop2MBB;
13454 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13455 .addReg(TmpDestReg)
13456 .addReg(MaskReg);
13457 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13458 .addReg(Tmp2Reg)
13459 .addReg(NewVal3Reg);
13460 BuildMI(BB, dl, TII->get(PPC::STWCX))
13461 .addReg(Tmp4Reg)
13462 .addReg(ZeroReg)
13463 .addReg(PtrReg);
13464 BuildMI(BB, dl, TII->get(PPC::BCC))
13466 .addReg(PPC::CR0)
13467 .addMBB(loop1MBB);
13468 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13469 BB->addSuccessor(loop1MBB);
13470 BB->addSuccessor(exitMBB);
13471
13472 // exitMBB:
13473 // ...
13474 BB = exitMBB;
13475 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13476 .addReg(TmpReg)
13477 .addReg(ShiftReg);
13478 } else if (MI.getOpcode() == PPC::FADDrtz) {
13479 // This pseudo performs an FADD with rounding mode temporarily forced
13480 // to round-to-zero. We emit this via custom inserter since the FPSCR
13481 // is not modeled at the SelectionDAG level.
13482 Register Dest = MI.getOperand(0).getReg();
13483 Register Src1 = MI.getOperand(1).getReg();
13484 Register Src2 = MI.getOperand(2).getReg();
13485 DebugLoc dl = MI.getDebugLoc();
13486
13487 MachineRegisterInfo &RegInfo = F->getRegInfo();
13488 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13489
13490 // Save FPSCR value.
13491 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13492
13493 // Set rounding mode to round-to-zero.
13494 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13495 .addImm(31)
13497
13498 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13499 .addImm(30)
13501
13502 // Perform addition.
13503 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13504 .addReg(Src1)
13505 .addReg(Src2);
13506 if (MI.getFlag(MachineInstr::NoFPExcept))
13508
13509 // Restore FPSCR value.
13510 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13511 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13512 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13513 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13514 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13515 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13516 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13517 ? PPC::ANDI8_rec
13518 : PPC::ANDI_rec;
13519 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13520 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13521
13522 MachineRegisterInfo &RegInfo = F->getRegInfo();
13523 Register Dest = RegInfo.createVirtualRegister(
13524 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13525
13526 DebugLoc Dl = MI.getDebugLoc();
13527 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13528 .addReg(MI.getOperand(1).getReg())
13529 .addImm(1);
13530 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13531 MI.getOperand(0).getReg())
13532 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13533 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13534 DebugLoc Dl = MI.getDebugLoc();
13535 MachineRegisterInfo &RegInfo = F->getRegInfo();
13536 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13537 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13538 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13539 MI.getOperand(0).getReg())
13540 .addReg(CRReg);
13541 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13542 DebugLoc Dl = MI.getDebugLoc();
13543 unsigned Imm = MI.getOperand(1).getImm();
13544 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13545 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13546 MI.getOperand(0).getReg())
13547 .addReg(PPC::CR0EQ);
13548 } else if (MI.getOpcode() == PPC::SETRNDi) {
13549 DebugLoc dl = MI.getDebugLoc();
13550 Register OldFPSCRReg = MI.getOperand(0).getReg();
13551
13552 // Save FPSCR value.
13553 if (MRI.use_empty(OldFPSCRReg))
13554 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13555 else
13556 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13557
13558 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13559 // the following settings:
13560 // 00 Round to nearest
13561 // 01 Round to 0
13562 // 10 Round to +inf
13563 // 11 Round to -inf
13564
13565 // When the operand is immediate, using the two least significant bits of
13566 // the immediate to set the bits 62:63 of FPSCR.
13567 unsigned Mode = MI.getOperand(1).getImm();
13568 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13569 .addImm(31)
13571
13572 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13573 .addImm(30)
13575 } else if (MI.getOpcode() == PPC::SETRND) {
13576 DebugLoc dl = MI.getDebugLoc();
13577
13578 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13579 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13580 // If the target doesn't have DirectMove, we should use stack to do the
13581 // conversion, because the target doesn't have the instructions like mtvsrd
13582 // or mfvsrd to do this conversion directly.
13583 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13584 if (Subtarget.hasDirectMove()) {
13585 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13586 .addReg(SrcReg);
13587 } else {
13588 // Use stack to do the register copy.
13589 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13590 MachineRegisterInfo &RegInfo = F->getRegInfo();
13591 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13592 if (RC == &PPC::F8RCRegClass) {
13593 // Copy register from F8RCRegClass to G8RCRegclass.
13594 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13595 "Unsupported RegClass.");
13596
13597 StoreOp = PPC::STFD;
13598 LoadOp = PPC::LD;
13599 } else {
13600 // Copy register from G8RCRegClass to F8RCRegclass.
13601 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13602 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13603 "Unsupported RegClass.");
13604 }
13605
13606 MachineFrameInfo &MFI = F->getFrameInfo();
13607 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13608
13609 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13610 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13612 MFI.getObjectAlign(FrameIdx));
13613
13614 // Store the SrcReg into the stack.
13615 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13616 .addReg(SrcReg)
13617 .addImm(0)
13618 .addFrameIndex(FrameIdx)
13619 .addMemOperand(MMOStore);
13620
13621 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13622 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13624 MFI.getObjectAlign(FrameIdx));
13625
13626 // Load from the stack where SrcReg is stored, and save to DestReg,
13627 // so we have done the RegClass conversion from RegClass::SrcReg to
13628 // RegClass::DestReg.
13629 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13630 .addImm(0)
13631 .addFrameIndex(FrameIdx)
13632 .addMemOperand(MMOLoad);
13633 }
13634 };
13635
13636 Register OldFPSCRReg = MI.getOperand(0).getReg();
13637
13638 // Save FPSCR value.
13639 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13640
13641 // When the operand is gprc register, use two least significant bits of the
13642 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13643 //
13644 // copy OldFPSCRTmpReg, OldFPSCRReg
13645 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13646 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13647 // copy NewFPSCRReg, NewFPSCRTmpReg
13648 // mtfsf 255, NewFPSCRReg
13649 MachineOperand SrcOp = MI.getOperand(1);
13650 MachineRegisterInfo &RegInfo = F->getRegInfo();
13651 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13652
13653 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13654
13655 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13656 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13657
13658 // The first operand of INSERT_SUBREG should be a register which has
13659 // subregisters, we only care about its RegClass, so we should use an
13660 // IMPLICIT_DEF register.
13661 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13662 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13663 .addReg(ImDefReg)
13664 .add(SrcOp)
13665 .addImm(1);
13666
13667 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13668 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13669 .addReg(OldFPSCRTmpReg)
13670 .addReg(ExtSrcReg)
13671 .addImm(0)
13672 .addImm(62);
13673
13674 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13675 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13676
13677 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13678 // bits of FPSCR.
13679 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13680 .addImm(255)
13681 .addReg(NewFPSCRReg)
13682 .addImm(0)
13683 .addImm(0);
13684 } else if (MI.getOpcode() == PPC::SETFLM) {
13685 DebugLoc Dl = MI.getDebugLoc();
13686
13687 // Result of setflm is previous FPSCR content, so we need to save it first.
13688 Register OldFPSCRReg = MI.getOperand(0).getReg();
13689 if (MRI.use_empty(OldFPSCRReg))
13690 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13691 else
13692 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13693
13694 // Put bits in 32:63 to FPSCR.
13695 Register NewFPSCRReg = MI.getOperand(1).getReg();
13696 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13697 .addImm(255)
13698 .addReg(NewFPSCRReg)
13699 .addImm(0)
13700 .addImm(0);
13701 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13702 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13703 return emitProbedAlloca(MI, BB);
13704 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13705 DebugLoc DL = MI.getDebugLoc();
13706 Register Src = MI.getOperand(2).getReg();
13707 Register Lo = MI.getOperand(0).getReg();
13708 Register Hi = MI.getOperand(1).getReg();
13709 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13710 .addDef(Lo)
13711 .addUse(Src, 0, PPC::sub_gp8_x1);
13712 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13713 .addDef(Hi)
13714 .addUse(Src, 0, PPC::sub_gp8_x0);
13715 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13716 MI.getOpcode() == PPC::STQX_PSEUDO) {
13717 DebugLoc DL = MI.getDebugLoc();
13718 // Ptr is used as the ptr_rc_no_r0 part
13719 // of LQ/STQ's memory operand and adding result of RA and RB,
13720 // so it has to be g8rc_and_g8rc_nox0.
13721 Register Ptr =
13722 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13723 Register Val = MI.getOperand(0).getReg();
13724 Register RA = MI.getOperand(1).getReg();
13725 Register RB = MI.getOperand(2).getReg();
13726 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13727 BuildMI(*BB, MI, DL,
13728 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13729 : TII->get(PPC::STQ))
13730 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13731 .addImm(0)
13732 .addReg(Ptr);
13733 } else {
13734 llvm_unreachable("Unexpected instr type to insert");
13735 }
13736
13737 MI.eraseFromParent(); // The pseudo instruction is gone now.
13738 return BB;
13739}
13740
13741//===----------------------------------------------------------------------===//
13742// Target Optimization Hooks
13743//===----------------------------------------------------------------------===//
13744
13745static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13746 // For the estimates, convergence is quadratic, so we essentially double the
13747 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13748 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13749 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13750 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13751 if (VT.getScalarType() == MVT::f64)
13752 RefinementSteps++;
13753 return RefinementSteps;
13754}
13755
13756SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13757 const DenormalMode &Mode) const {
13758 // We only have VSX Vector Test for software Square Root.
13759 EVT VT = Op.getValueType();
13760 if (!isTypeLegal(MVT::i1) ||
13761 (VT != MVT::f64 &&
13762 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13763 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13764
13765 SDLoc DL(Op);
13766 // The output register of FTSQRT is CR field.
13767 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13768 // ftsqrt BF,FRB
13769 // Let e_b be the unbiased exponent of the double-precision
13770 // floating-point operand in register FRB.
13771 // fe_flag is set to 1 if either of the following conditions occurs.
13772 // - The double-precision floating-point operand in register FRB is a zero,
13773 // a NaN, or an infinity, or a negative value.
13774 // - e_b is less than or equal to -970.
13775 // Otherwise fe_flag is set to 0.
13776 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13777 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13778 // exponent is less than -970)
13779 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13780 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13781 FTSQRT, SRIdxVal),
13782 0);
13783}
13784
13785SDValue
13786PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13787 SelectionDAG &DAG) const {
13788 // We only have VSX Vector Square Root.
13789 EVT VT = Op.getValueType();
13790 if (VT != MVT::f64 &&
13791 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13793
13794 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13795}
13796
13797SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13798 int Enabled, int &RefinementSteps,
13799 bool &UseOneConstNR,
13800 bool Reciprocal) const {
13801 EVT VT = Operand.getValueType();
13802 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13803 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13804 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13805 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13806 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13807 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13808
13809 // The Newton-Raphson computation with a single constant does not provide
13810 // enough accuracy on some CPUs.
13811 UseOneConstNR = !Subtarget.needsTwoConstNR();
13812 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13813 }
13814 return SDValue();
13815}
13816
13817SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13818 int Enabled,
13819 int &RefinementSteps) const {
13820 EVT VT = Operand.getValueType();
13821 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13822 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13823 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13824 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13825 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13826 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13827 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13828 }
13829 return SDValue();
13830}
13831
13832unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13833 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13834 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13835 // enabled for division), this functionality is redundant with the default
13836 // combiner logic (once the division -> reciprocal/multiply transformation
13837 // has taken place). As a result, this matters more for older cores than for
13838 // newer ones.
13839
13840 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13841 // reciprocal if there are two or more FDIVs (for embedded cores with only
13842 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13843 switch (Subtarget.getCPUDirective()) {
13844 default:
13845 return 3;
13846 case PPC::DIR_440:
13847 case PPC::DIR_A2:
13848 case PPC::DIR_E500:
13849 case PPC::DIR_E500mc:
13850 case PPC::DIR_E5500:
13851 return 2;
13852 }
13853}
13854
13855// isConsecutiveLSLoc needs to work even if all adds have not yet been
13856// collapsed, and so we need to look through chains of them.
13858 int64_t& Offset, SelectionDAG &DAG) {
13859 if (DAG.isBaseWithConstantOffset(Loc)) {
13860 Base = Loc.getOperand(0);
13861 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13862
13863 // The base might itself be a base plus an offset, and if so, accumulate
13864 // that as well.
13866 }
13867}
13868
13870 unsigned Bytes, int Dist,
13871 SelectionDAG &DAG) {
13872 if (VT.getSizeInBits() / 8 != Bytes)
13873 return false;
13874
13875 SDValue BaseLoc = Base->getBasePtr();
13876 if (Loc.getOpcode() == ISD::FrameIndex) {
13877 if (BaseLoc.getOpcode() != ISD::FrameIndex)
13878 return false;
13880 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13881 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13882 int FS = MFI.getObjectSize(FI);
13883 int BFS = MFI.getObjectSize(BFI);
13884 if (FS != BFS || FS != (int)Bytes) return false;
13885 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13886 }
13887
13888 SDValue Base1 = Loc, Base2 = BaseLoc;
13889 int64_t Offset1 = 0, Offset2 = 0;
13890 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13891 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13892 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13893 return true;
13894
13895 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13896 const GlobalValue *GV1 = nullptr;
13897 const GlobalValue *GV2 = nullptr;
13898 Offset1 = 0;
13899 Offset2 = 0;
13900 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13901 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13902 if (isGA1 && isGA2 && GV1 == GV2)
13903 return Offset1 == (Offset2 + Dist*Bytes);
13904 return false;
13905}
13906
13907// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13908// not enforce equality of the chain operands.
13910 unsigned Bytes, int Dist,
13911 SelectionDAG &DAG) {
13912 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13913 EVT VT = LS->getMemoryVT();
13914 SDValue Loc = LS->getBasePtr();
13915 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13916 }
13917
13918 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13919 EVT VT;
13920 switch (N->getConstantOperandVal(1)) {
13921 default: return false;
13922 case Intrinsic::ppc_altivec_lvx:
13923 case Intrinsic::ppc_altivec_lvxl:
13924 case Intrinsic::ppc_vsx_lxvw4x:
13925 case Intrinsic::ppc_vsx_lxvw4x_be:
13926 VT = MVT::v4i32;
13927 break;
13928 case Intrinsic::ppc_vsx_lxvd2x:
13929 case Intrinsic::ppc_vsx_lxvd2x_be:
13930 VT = MVT::v2f64;
13931 break;
13932 case Intrinsic::ppc_altivec_lvebx:
13933 VT = MVT::i8;
13934 break;
13935 case Intrinsic::ppc_altivec_lvehx:
13936 VT = MVT::i16;
13937 break;
13938 case Intrinsic::ppc_altivec_lvewx:
13939 VT = MVT::i32;
13940 break;
13941 }
13942
13943 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13944 }
13945
13946 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13947 EVT VT;
13948 switch (N->getConstantOperandVal(1)) {
13949 default: return false;
13950 case Intrinsic::ppc_altivec_stvx:
13951 case Intrinsic::ppc_altivec_stvxl:
13952 case Intrinsic::ppc_vsx_stxvw4x:
13953 VT = MVT::v4i32;
13954 break;
13955 case Intrinsic::ppc_vsx_stxvd2x:
13956 VT = MVT::v2f64;
13957 break;
13958 case Intrinsic::ppc_vsx_stxvw4x_be:
13959 VT = MVT::v4i32;
13960 break;
13961 case Intrinsic::ppc_vsx_stxvd2x_be:
13962 VT = MVT::v2f64;
13963 break;
13964 case Intrinsic::ppc_altivec_stvebx:
13965 VT = MVT::i8;
13966 break;
13967 case Intrinsic::ppc_altivec_stvehx:
13968 VT = MVT::i16;
13969 break;
13970 case Intrinsic::ppc_altivec_stvewx:
13971 VT = MVT::i32;
13972 break;
13973 }
13974
13975 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13976 }
13977
13978 return false;
13979}
13980
13981// Return true is there is a nearyby consecutive load to the one provided
13982// (regardless of alignment). We search up and down the chain, looking though
13983// token factors and other loads (but nothing else). As a result, a true result
13984// indicates that it is safe to create a new consecutive load adjacent to the
13985// load provided.
13987 SDValue Chain = LD->getChain();
13988 EVT VT = LD->getMemoryVT();
13989
13990 SmallSet<SDNode *, 16> LoadRoots;
13991 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13992 SmallSet<SDNode *, 16> Visited;
13993
13994 // First, search up the chain, branching to follow all token-factor operands.
13995 // If we find a consecutive load, then we're done, otherwise, record all
13996 // nodes just above the top-level loads and token factors.
13997 while (!Queue.empty()) {
13998 SDNode *ChainNext = Queue.pop_back_val();
13999 if (!Visited.insert(ChainNext).second)
14000 continue;
14001
14002 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14003 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14004 return true;
14005
14006 if (!Visited.count(ChainLD->getChain().getNode()))
14007 Queue.push_back(ChainLD->getChain().getNode());
14008 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14009 for (const SDUse &O : ChainNext->ops())
14010 if (!Visited.count(O.getNode()))
14011 Queue.push_back(O.getNode());
14012 } else
14013 LoadRoots.insert(ChainNext);
14014 }
14015
14016 // Second, search down the chain, starting from the top-level nodes recorded
14017 // in the first phase. These top-level nodes are the nodes just above all
14018 // loads and token factors. Starting with their uses, recursively look though
14019 // all loads (just the chain uses) and token factors to find a consecutive
14020 // load.
14021 Visited.clear();
14022 Queue.clear();
14023
14024 for (SDNode *I : LoadRoots) {
14025 Queue.push_back(I);
14026
14027 while (!Queue.empty()) {
14028 SDNode *LoadRoot = Queue.pop_back_val();
14029 if (!Visited.insert(LoadRoot).second)
14030 continue;
14031
14032 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14033 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14034 return true;
14035
14036 for (SDNode *U : LoadRoot->uses())
14037 if (((isa<MemSDNode>(U) &&
14038 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14039 U->getOpcode() == ISD::TokenFactor) &&
14040 !Visited.count(U))
14041 Queue.push_back(U);
14042 }
14043 }
14044
14045 return false;
14046}
14047
14048/// This function is called when we have proved that a SETCC node can be replaced
14049/// by subtraction (and other supporting instructions) so that the result of
14050/// comparison is kept in a GPR instead of CR. This function is purely for
14051/// codegen purposes and has some flags to guide the codegen process.
14052static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14053 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14054 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14055
14056 // Zero extend the operands to the largest legal integer. Originally, they
14057 // must be of a strictly smaller size.
14058 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14059 DAG.getConstant(Size, DL, MVT::i32));
14060 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14061 DAG.getConstant(Size, DL, MVT::i32));
14062
14063 // Swap if needed. Depends on the condition code.
14064 if (Swap)
14065 std::swap(Op0, Op1);
14066
14067 // Subtract extended integers.
14068 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14069
14070 // Move the sign bit to the least significant position and zero out the rest.
14071 // Now the least significant bit carries the result of original comparison.
14072 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14073 DAG.getConstant(Size - 1, DL, MVT::i32));
14074 auto Final = Shifted;
14075
14076 // Complement the result if needed. Based on the condition code.
14077 if (Complement)
14078 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14079 DAG.getConstant(1, DL, MVT::i64));
14080
14081 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14082}
14083
14084SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14085 DAGCombinerInfo &DCI) const {
14086 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14087
14088 SelectionDAG &DAG = DCI.DAG;
14089 SDLoc DL(N);
14090
14091 // Size of integers being compared has a critical role in the following
14092 // analysis, so we prefer to do this when all types are legal.
14093 if (!DCI.isAfterLegalizeDAG())
14094 return SDValue();
14095
14096 // If all users of SETCC extend its value to a legal integer type
14097 // then we replace SETCC with a subtraction
14098 for (const SDNode *U : N->uses())
14099 if (U->getOpcode() != ISD::ZERO_EXTEND)
14100 return SDValue();
14101
14102 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14103 auto OpSize = N->getOperand(0).getValueSizeInBits();
14104
14106
14107 if (OpSize < Size) {
14108 switch (CC) {
14109 default: break;
14110 case ISD::SETULT:
14111 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14112 case ISD::SETULE:
14113 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14114 case ISD::SETUGT:
14115 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14116 case ISD::SETUGE:
14117 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14118 }
14119 }
14120
14121 return SDValue();
14122}
14123
14124SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14125 DAGCombinerInfo &DCI) const {
14126 SelectionDAG &DAG = DCI.DAG;
14127 SDLoc dl(N);
14128
14129 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14130 // If we're tracking CR bits, we need to be careful that we don't have:
14131 // trunc(binary-ops(zext(x), zext(y)))
14132 // or
14133 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14134 // such that we're unnecessarily moving things into GPRs when it would be
14135 // better to keep them in CR bits.
14136
14137 // Note that trunc here can be an actual i1 trunc, or can be the effective
14138 // truncation that comes from a setcc or select_cc.
14139 if (N->getOpcode() == ISD::TRUNCATE &&
14140 N->getValueType(0) != MVT::i1)
14141 return SDValue();
14142
14143 if (N->getOperand(0).getValueType() != MVT::i32 &&
14144 N->getOperand(0).getValueType() != MVT::i64)
14145 return SDValue();
14146
14147 if (N->getOpcode() == ISD::SETCC ||
14148 N->getOpcode() == ISD::SELECT_CC) {
14149 // If we're looking at a comparison, then we need to make sure that the
14150 // high bits (all except for the first) don't matter the result.
14152 cast<CondCodeSDNode>(N->getOperand(
14153 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14154 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14155
14157 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14158 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14159 return SDValue();
14160 } else if (ISD::isUnsignedIntSetCC(CC)) {
14161 if (!DAG.MaskedValueIsZero(N->getOperand(0),
14162 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14163 !DAG.MaskedValueIsZero(N->getOperand(1),
14164 APInt::getHighBitsSet(OpBits, OpBits-1)))
14165 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14166 : SDValue());
14167 } else {
14168 // This is neither a signed nor an unsigned comparison, just make sure
14169 // that the high bits are equal.
14170 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14171 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14172
14173 // We don't really care about what is known about the first bit (if
14174 // anything), so pretend that it is known zero for both to ensure they can
14175 // be compared as constants.
14176 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14177 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14178
14179 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14180 Op1Known.getConstant() != Op2Known.getConstant())
14181 return SDValue();
14182 }
14183 }
14184
14185 // We now know that the higher-order bits are irrelevant, we just need to
14186 // make sure that all of the intermediate operations are bit operations, and
14187 // all inputs are extensions.
14188 if (N->getOperand(0).getOpcode() != ISD::AND &&
14189 N->getOperand(0).getOpcode() != ISD::OR &&
14190 N->getOperand(0).getOpcode() != ISD::XOR &&
14191 N->getOperand(0).getOpcode() != ISD::SELECT &&
14192 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14193 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14194 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14195 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14196 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14197 return SDValue();
14198
14199 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14200 N->getOperand(1).getOpcode() != ISD::AND &&
14201 N->getOperand(1).getOpcode() != ISD::OR &&
14202 N->getOperand(1).getOpcode() != ISD::XOR &&
14203 N->getOperand(1).getOpcode() != ISD::SELECT &&
14204 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14205 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14206 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14207 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14208 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14209 return SDValue();
14210
14212 SmallVector<SDValue, 8> BinOps, PromOps;
14214
14215 for (unsigned i = 0; i < 2; ++i) {
14216 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14217 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14218 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14219 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14220 isa<ConstantSDNode>(N->getOperand(i)))
14221 Inputs.push_back(N->getOperand(i));
14222 else
14223 BinOps.push_back(N->getOperand(i));
14224
14225 if (N->getOpcode() == ISD::TRUNCATE)
14226 break;
14227 }
14228
14229 // Visit all inputs, collect all binary operations (and, or, xor and
14230 // select) that are all fed by extensions.
14231 while (!BinOps.empty()) {
14232 SDValue BinOp = BinOps.pop_back_val();
14233
14234 if (!Visited.insert(BinOp.getNode()).second)
14235 continue;
14236
14237 PromOps.push_back(BinOp);
14238
14239 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14240 // The condition of the select is not promoted.
14241 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14242 continue;
14243 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14244 continue;
14245
14246 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14247 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14248 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14249 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14250 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14251 Inputs.push_back(BinOp.getOperand(i));
14252 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14253 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14254 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14255 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14256 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14257 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14258 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14259 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14260 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14261 BinOps.push_back(BinOp.getOperand(i));
14262 } else {
14263 // We have an input that is not an extension or another binary
14264 // operation; we'll abort this transformation.
14265 return SDValue();
14266 }
14267 }
14268 }
14269
14270 // Make sure that this is a self-contained cluster of operations (which
14271 // is not quite the same thing as saying that everything has only one
14272 // use).
14273 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14274 if (isa<ConstantSDNode>(Inputs[i]))
14275 continue;
14276
14277 for (const SDNode *User : Inputs[i].getNode()->uses()) {
14278 if (User != N && !Visited.count(User))
14279 return SDValue();
14280
14281 // Make sure that we're not going to promote the non-output-value
14282 // operand(s) or SELECT or SELECT_CC.
14283 // FIXME: Although we could sometimes handle this, and it does occur in
14284 // practice that one of the condition inputs to the select is also one of
14285 // the outputs, we currently can't deal with this.
14286 if (User->getOpcode() == ISD::SELECT) {
14287 if (User->getOperand(0) == Inputs[i])
14288 return SDValue();
14289 } else if (User->getOpcode() == ISD::SELECT_CC) {
14290 if (User->getOperand(0) == Inputs[i] ||
14291 User->getOperand(1) == Inputs[i])
14292 return SDValue();
14293 }
14294 }
14295 }
14296
14297 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14298 for (const SDNode *User : PromOps[i].getNode()->uses()) {
14299 if (User != N && !Visited.count(User))
14300 return SDValue();
14301
14302 // Make sure that we're not going to promote the non-output-value
14303 // operand(s) or SELECT or SELECT_CC.
14304 // FIXME: Although we could sometimes handle this, and it does occur in
14305 // practice that one of the condition inputs to the select is also one of
14306 // the outputs, we currently can't deal with this.
14307 if (User->getOpcode() == ISD::SELECT) {
14308 if (User->getOperand(0) == PromOps[i])
14309 return SDValue();
14310 } else if (User->getOpcode() == ISD::SELECT_CC) {
14311 if (User->getOperand(0) == PromOps[i] ||
14312 User->getOperand(1) == PromOps[i])
14313 return SDValue();
14314 }
14315 }
14316 }
14317
14318 // Replace all inputs with the extension operand.
14319 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14320 // Constants may have users outside the cluster of to-be-promoted nodes,
14321 // and so we need to replace those as we do the promotions.
14322 if (isa<ConstantSDNode>(Inputs[i]))
14323 continue;
14324 else
14325 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14326 }
14327
14328 std::list<HandleSDNode> PromOpHandles;
14329 for (auto &PromOp : PromOps)
14330 PromOpHandles.emplace_back(PromOp);
14331
14332 // Replace all operations (these are all the same, but have a different
14333 // (i1) return type). DAG.getNode will validate that the types of
14334 // a binary operator match, so go through the list in reverse so that
14335 // we've likely promoted both operands first. Any intermediate truncations or
14336 // extensions disappear.
14337 while (!PromOpHandles.empty()) {
14338 SDValue PromOp = PromOpHandles.back().getValue();
14339 PromOpHandles.pop_back();
14340
14341 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14342 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14343 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14344 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14345 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14346 PromOp.getOperand(0).getValueType() != MVT::i1) {
14347 // The operand is not yet ready (see comment below).
14348 PromOpHandles.emplace_front(PromOp);
14349 continue;
14350 }
14351
14352 SDValue RepValue = PromOp.getOperand(0);
14353 if (isa<ConstantSDNode>(RepValue))
14354 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14355
14356 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14357 continue;
14358 }
14359
14360 unsigned C;
14361 switch (PromOp.getOpcode()) {
14362 default: C = 0; break;
14363 case ISD::SELECT: C = 1; break;
14364 case ISD::SELECT_CC: C = 2; break;
14365 }
14366
14367 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14368 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14369 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14370 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14371 // The to-be-promoted operands of this node have not yet been
14372 // promoted (this should be rare because we're going through the
14373 // list backward, but if one of the operands has several users in
14374 // this cluster of to-be-promoted nodes, it is possible).
14375 PromOpHandles.emplace_front(PromOp);
14376 continue;
14377 }
14378
14379 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
14380
14381 // If there are any constant inputs, make sure they're replaced now.
14382 for (unsigned i = 0; i < 2; ++i)
14383 if (isa<ConstantSDNode>(Ops[C+i]))
14384 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14385
14386 DAG.ReplaceAllUsesOfValueWith(PromOp,
14387 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14388 }
14389
14390 // Now we're left with the initial truncation itself.
14391 if (N->getOpcode() == ISD::TRUNCATE)
14392 return N->getOperand(0);
14393
14394 // Otherwise, this is a comparison. The operands to be compared have just
14395 // changed type (to i1), but everything else is the same.
14396 return SDValue(N, 0);
14397}
14398
14399SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14400 DAGCombinerInfo &DCI) const {
14401 SelectionDAG &DAG = DCI.DAG;
14402 SDLoc dl(N);
14403
14404 // If we're tracking CR bits, we need to be careful that we don't have:
14405 // zext(binary-ops(trunc(x), trunc(y)))
14406 // or
14407 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14408 // such that we're unnecessarily moving things into CR bits that can more
14409 // efficiently stay in GPRs. Note that if we're not certain that the high
14410 // bits are set as required by the final extension, we still may need to do
14411 // some masking to get the proper behavior.
14412
14413 // This same functionality is important on PPC64 when dealing with
14414 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14415 // the return values of functions. Because it is so similar, it is handled
14416 // here as well.
14417
14418 if (N->getValueType(0) != MVT::i32 &&
14419 N->getValueType(0) != MVT::i64)
14420 return SDValue();
14421
14422 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14423 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14424 return SDValue();
14425
14426 if (N->getOperand(0).getOpcode() != ISD::AND &&
14427 N->getOperand(0).getOpcode() != ISD::OR &&
14428 N->getOperand(0).getOpcode() != ISD::XOR &&
14429 N->getOperand(0).getOpcode() != ISD::SELECT &&
14430 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14431 return SDValue();
14432
14434 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14436
14437 // Visit all inputs, collect all binary operations (and, or, xor and
14438 // select) that are all fed by truncations.
14439 while (!BinOps.empty()) {
14440 SDValue BinOp = BinOps.pop_back_val();
14441
14442 if (!Visited.insert(BinOp.getNode()).second)
14443 continue;
14444
14445 PromOps.push_back(BinOp);
14446
14447 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14448 // The condition of the select is not promoted.
14449 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14450 continue;
14451 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14452 continue;
14453
14454 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14455 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14456 Inputs.push_back(BinOp.getOperand(i));
14457 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14458 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14459 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14460 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14461 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14462 BinOps.push_back(BinOp.getOperand(i));
14463 } else {
14464 // We have an input that is not a truncation or another binary
14465 // operation; we'll abort this transformation.
14466 return SDValue();
14467 }
14468 }
14469 }
14470
14471 // The operands of a select that must be truncated when the select is
14472 // promoted because the operand is actually part of the to-be-promoted set.
14473 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14474
14475 // Make sure that this is a self-contained cluster of operations (which
14476 // is not quite the same thing as saying that everything has only one
14477 // use).
14478 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14479 if (isa<ConstantSDNode>(Inputs[i]))
14480 continue;
14481
14482 for (SDNode *User : Inputs[i].getNode()->uses()) {
14483 if (User != N && !Visited.count(User))
14484 return SDValue();
14485
14486 // If we're going to promote the non-output-value operand(s) or SELECT or
14487 // SELECT_CC, record them for truncation.
14488 if (User->getOpcode() == ISD::SELECT) {
14489 if (User->getOperand(0) == Inputs[i])
14490 SelectTruncOp[0].insert(std::make_pair(User,
14491 User->getOperand(0).getValueType()));
14492 } else if (User->getOpcode() == ISD::SELECT_CC) {
14493 if (User->getOperand(0) == Inputs[i])
14494 SelectTruncOp[0].insert(std::make_pair(User,
14495 User->getOperand(0).getValueType()));
14496 if (User->getOperand(1) == Inputs[i])
14497 SelectTruncOp[1].insert(std::make_pair(User,
14498 User->getOperand(1).getValueType()));
14499 }
14500 }
14501 }
14502
14503 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14504 for (SDNode *User : PromOps[i].getNode()->uses()) {
14505 if (User != N && !Visited.count(User))
14506 return SDValue();
14507
14508 // If we're going to promote the non-output-value operand(s) or SELECT or
14509 // SELECT_CC, record them for truncation.
14510 if (User->getOpcode() == ISD::SELECT) {
14511 if (User->getOperand(0) == PromOps[i])
14512 SelectTruncOp[0].insert(std::make_pair(User,
14513 User->getOperand(0).getValueType()));
14514 } else if (User->getOpcode() == ISD::SELECT_CC) {
14515 if (User->getOperand(0) == PromOps[i])
14516 SelectTruncOp[0].insert(std::make_pair(User,
14517 User->getOperand(0).getValueType()));
14518 if (User->getOperand(1) == PromOps[i])
14519 SelectTruncOp[1].insert(std::make_pair(User,
14520 User->getOperand(1).getValueType()));
14521 }
14522 }
14523 }
14524
14525 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14526 bool ReallyNeedsExt = false;
14527 if (N->getOpcode() != ISD::ANY_EXTEND) {
14528 // If all of the inputs are not already sign/zero extended, then
14529 // we'll still need to do that at the end.
14530 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14531 if (isa<ConstantSDNode>(Inputs[i]))
14532 continue;
14533
14534 unsigned OpBits =
14535 Inputs[i].getOperand(0).getValueSizeInBits();
14536 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14537
14538 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14539 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14540 APInt::getHighBitsSet(OpBits,
14541 OpBits-PromBits))) ||
14542 (N->getOpcode() == ISD::SIGN_EXTEND &&
14543 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14544 (OpBits-(PromBits-1)))) {
14545 ReallyNeedsExt = true;
14546 break;
14547 }
14548 }
14549 }
14550
14551 // Replace all inputs, either with the truncation operand, or a
14552 // truncation or extension to the final output type.
14553 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14554 // Constant inputs need to be replaced with the to-be-promoted nodes that
14555 // use them because they might have users outside of the cluster of
14556 // promoted nodes.
14557 if (isa<ConstantSDNode>(Inputs[i]))
14558 continue;
14559
14560 SDValue InSrc = Inputs[i].getOperand(0);
14561 if (Inputs[i].getValueType() == N->getValueType(0))
14562 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14563 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14564 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14565 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14566 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14567 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14568 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14569 else
14570 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14571 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14572 }
14573
14574 std::list<HandleSDNode> PromOpHandles;
14575 for (auto &PromOp : PromOps)
14576 PromOpHandles.emplace_back(PromOp);
14577
14578 // Replace all operations (these are all the same, but have a different
14579 // (promoted) return type). DAG.getNode will validate that the types of
14580 // a binary operator match, so go through the list in reverse so that
14581 // we've likely promoted both operands first.
14582 while (!PromOpHandles.empty()) {
14583 SDValue PromOp = PromOpHandles.back().getValue();
14584 PromOpHandles.pop_back();
14585
14586 unsigned C;
14587 switch (PromOp.getOpcode()) {
14588 default: C = 0; break;
14589 case ISD::SELECT: C = 1; break;
14590 case ISD::SELECT_CC: C = 2; break;
14591 }
14592
14593 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14594 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14595 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14596 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14597 // The to-be-promoted operands of this node have not yet been
14598 // promoted (this should be rare because we're going through the
14599 // list backward, but if one of the operands has several users in
14600 // this cluster of to-be-promoted nodes, it is possible).
14601 PromOpHandles.emplace_front(PromOp);
14602 continue;
14603 }
14604
14605 // For SELECT and SELECT_CC nodes, we do a similar check for any
14606 // to-be-promoted comparison inputs.
14607 if (PromOp.getOpcode() == ISD::SELECT ||
14608 PromOp.getOpcode() == ISD::SELECT_CC) {
14609 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14610 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14611 (SelectTruncOp[1].count(PromOp.getNode()) &&
14612 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14613 PromOpHandles.emplace_front(PromOp);
14614 continue;
14615 }
14616 }
14617
14619 PromOp.getNode()->op_end());
14620
14621 // If this node has constant inputs, then they'll need to be promoted here.
14622 for (unsigned i = 0; i < 2; ++i) {
14623 if (!isa<ConstantSDNode>(Ops[C+i]))
14624 continue;
14625 if (Ops[C+i].getValueType() == N->getValueType(0))
14626 continue;
14627
14628 if (N->getOpcode() == ISD::SIGN_EXTEND)
14629 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14630 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14631 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14632 else
14633 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14634 }
14635
14636 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14637 // truncate them again to the original value type.
14638 if (PromOp.getOpcode() == ISD::SELECT ||
14639 PromOp.getOpcode() == ISD::SELECT_CC) {
14640 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14641 if (SI0 != SelectTruncOp[0].end())
14642 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14643 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14644 if (SI1 != SelectTruncOp[1].end())
14645 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14646 }
14647
14648 DAG.ReplaceAllUsesOfValueWith(PromOp,
14649 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14650 }
14651
14652 // Now we're left with the initial extension itself.
14653 if (!ReallyNeedsExt)
14654 return N->getOperand(0);
14655
14656 // To zero extend, just mask off everything except for the first bit (in the
14657 // i1 case).
14658 if (N->getOpcode() == ISD::ZERO_EXTEND)
14659 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14661 N->getValueSizeInBits(0), PromBits),
14662 dl, N->getValueType(0)));
14663
14664 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14665 "Invalid extension type");
14666 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14667 SDValue ShiftCst =
14668 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14669 return DAG.getNode(
14670 ISD::SRA, dl, N->getValueType(0),
14671 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14672 ShiftCst);
14673}
14674
14675SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14676 DAGCombinerInfo &DCI) const {
14677 assert(N->getOpcode() == ISD::SETCC &&
14678 "Should be called with a SETCC node");
14679
14680 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14681 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14682 SDValue LHS = N->getOperand(0);
14683 SDValue RHS = N->getOperand(1);
14684
14685 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14686 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14687 LHS.hasOneUse())
14688 std::swap(LHS, RHS);
14689
14690 // x == 0-y --> x+y == 0
14691 // x != 0-y --> x+y != 0
14692 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14693 RHS.hasOneUse()) {
14694 SDLoc DL(N);
14695 SelectionDAG &DAG = DCI.DAG;
14696 EVT VT = N->getValueType(0);
14697 EVT OpVT = LHS.getValueType();
14698 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14699 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14700 }
14701 }
14702
14703 return DAGCombineTruncBoolExt(N, DCI);
14704}
14705
14706// Is this an extending load from an f32 to an f64?
14707static bool isFPExtLoad(SDValue Op) {
14708 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14709 return LD->getExtensionType() == ISD::EXTLOAD &&
14710 Op.getValueType() == MVT::f64;
14711 return false;
14712}
14713
14714/// Reduces the number of fp-to-int conversion when building a vector.
14715///
14716/// If this vector is built out of floating to integer conversions,
14717/// transform it to a vector built out of floating point values followed by a
14718/// single floating to integer conversion of the vector.
14719/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14720/// becomes (fptosi (build_vector ($A, $B, ...)))
14721SDValue PPCTargetLowering::
14722combineElementTruncationToVectorTruncation(SDNode *N,
14723 DAGCombinerInfo &DCI) const {
14724 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14725 "Should be called with a BUILD_VECTOR node");
14726
14727 SelectionDAG &DAG = DCI.DAG;
14728 SDLoc dl(N);
14729
14730 SDValue FirstInput = N->getOperand(0);
14731 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14732 "The input operand must be an fp-to-int conversion.");
14733
14734 // This combine happens after legalization so the fp_to_[su]i nodes are
14735 // already converted to PPCSISD nodes.
14736 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14737 if (FirstConversion == PPCISD::FCTIDZ ||
14738 FirstConversion == PPCISD::FCTIDUZ ||
14739 FirstConversion == PPCISD::FCTIWZ ||
14740 FirstConversion == PPCISD::FCTIWUZ) {
14741 bool IsSplat = true;
14742 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14743 FirstConversion == PPCISD::FCTIWUZ;
14744 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14746 EVT TargetVT = N->getValueType(0);
14747 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14748 SDValue NextOp = N->getOperand(i);
14749 if (NextOp.getOpcode() != PPCISD::MFVSR)
14750 return SDValue();
14751 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14752 if (NextConversion != FirstConversion)
14753 return SDValue();
14754 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14755 // This is not valid if the input was originally double precision. It is
14756 // also not profitable to do unless this is an extending load in which
14757 // case doing this combine will allow us to combine consecutive loads.
14758 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14759 return SDValue();
14760 if (N->getOperand(i) != FirstInput)
14761 IsSplat = false;
14762 }
14763
14764 // If this is a splat, we leave it as-is since there will be only a single
14765 // fp-to-int conversion followed by a splat of the integer. This is better
14766 // for 32-bit and smaller ints and neutral for 64-bit ints.
14767 if (IsSplat)
14768 return SDValue();
14769
14770 // Now that we know we have the right type of node, get its operands
14771 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14772 SDValue In = N->getOperand(i).getOperand(0);
14773 if (Is32Bit) {
14774 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14775 // here, we know that all inputs are extending loads so this is safe).
14776 if (In.isUndef())
14777 Ops.push_back(DAG.getUNDEF(SrcVT));
14778 else {
14779 SDValue Trunc =
14780 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14781 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14782 Ops.push_back(Trunc);
14783 }
14784 } else
14785 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14786 }
14787
14788 unsigned Opcode;
14789 if (FirstConversion == PPCISD::FCTIDZ ||
14790 FirstConversion == PPCISD::FCTIWZ)
14791 Opcode = ISD::FP_TO_SINT;
14792 else
14793 Opcode = ISD::FP_TO_UINT;
14794
14795 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14796 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14797 return DAG.getNode(Opcode, dl, TargetVT, BV);
14798 }
14799 return SDValue();
14800}
14801
14802/// Reduce the number of loads when building a vector.
14803///
14804/// Building a vector out of multiple loads can be converted to a load
14805/// of the vector type if the loads are consecutive. If the loads are
14806/// consecutive but in descending order, a shuffle is added at the end
14807/// to reorder the vector.
14809 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14810 "Should be called with a BUILD_VECTOR node");
14811
14812 SDLoc dl(N);
14813
14814 // Return early for non byte-sized type, as they can't be consecutive.
14815 if (!N->getValueType(0).getVectorElementType().isByteSized())
14816 return SDValue();
14817
14818 bool InputsAreConsecutiveLoads = true;
14819 bool InputsAreReverseConsecutive = true;
14820 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14821 SDValue FirstInput = N->getOperand(0);
14822 bool IsRoundOfExtLoad = false;
14823 LoadSDNode *FirstLoad = nullptr;
14824
14825 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14826 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14827 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14828 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14829 }
14830 // Not a build vector of (possibly fp_rounded) loads.
14831 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14832 N->getNumOperands() == 1)
14833 return SDValue();
14834
14835 if (!IsRoundOfExtLoad)
14836 FirstLoad = cast<LoadSDNode>(FirstInput);
14837
14839 InputLoads.push_back(FirstLoad);
14840 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14841 // If any inputs are fp_round(extload), they all must be.
14842 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14843 return SDValue();
14844
14845 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14846 N->getOperand(i);
14847 if (NextInput.getOpcode() != ISD::LOAD)
14848 return SDValue();
14849
14850 SDValue PreviousInput =
14851 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14852 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14853 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14854
14855 // If any inputs are fp_round(extload), they all must be.
14856 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14857 return SDValue();
14858
14859 // We only care about regular loads. The PPC-specific load intrinsics
14860 // will not lead to a merge opportunity.
14861 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14862 InputsAreConsecutiveLoads = false;
14863 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14864 InputsAreReverseConsecutive = false;
14865
14866 // Exit early if the loads are neither consecutive nor reverse consecutive.
14867 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14868 return SDValue();
14869 InputLoads.push_back(LD2);
14870 }
14871
14872 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14873 "The loads cannot be both consecutive and reverse consecutive.");
14874
14875 SDValue WideLoad;
14876 SDValue ReturnSDVal;
14877 if (InputsAreConsecutiveLoads) {
14878 assert(FirstLoad && "Input needs to be a LoadSDNode.");
14879 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14880 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14881 FirstLoad->getAlign());
14882 ReturnSDVal = WideLoad;
14883 } else if (InputsAreReverseConsecutive) {
14884 LoadSDNode *LastLoad = InputLoads.back();
14885 assert(LastLoad && "Input needs to be a LoadSDNode.");
14886 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14887 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14888 LastLoad->getAlign());
14890 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14891 Ops.push_back(i);
14892
14893 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14894 DAG.getUNDEF(N->getValueType(0)), Ops);
14895 } else
14896 return SDValue();
14897
14898 for (auto *LD : InputLoads)
14899 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14900 return ReturnSDVal;
14901}
14902
14903// This function adds the required vector_shuffle needed to get
14904// the elements of the vector extract in the correct position
14905// as specified by the CorrectElems encoding.
14907 SDValue Input, uint64_t Elems,
14908 uint64_t CorrectElems) {
14909 SDLoc dl(N);
14910
14911 unsigned NumElems = Input.getValueType().getVectorNumElements();
14912 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14913
14914 // Knowing the element indices being extracted from the original
14915 // vector and the order in which they're being inserted, just put
14916 // them at element indices required for the instruction.
14917 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14918 if (DAG.getDataLayout().isLittleEndian())
14919 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14920 else
14921 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14922 CorrectElems = CorrectElems >> 8;
14923 Elems = Elems >> 8;
14924 }
14925
14926 SDValue Shuffle =
14927 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14928 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14929
14930 EVT VT = N->getValueType(0);
14931 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14932
14933 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14936 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14937 DAG.getValueType(ExtVT));
14938}
14939
14940// Look for build vector patterns where input operands come from sign
14941// extended vector_extract elements of specific indices. If the correct indices
14942// aren't used, add a vector shuffle to fix up the indices and create
14943// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14944// during instruction selection.
14946 // This array encodes the indices that the vector sign extend instructions
14947 // extract from when extending from one type to another for both BE and LE.
14948 // The right nibble of each byte corresponds to the LE incides.
14949 // and the left nibble of each byte corresponds to the BE incides.
14950 // For example: 0x3074B8FC byte->word
14951 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14952 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14953 // For example: 0x000070F8 byte->double word
14954 // For LE: the allowed indices are: 0x0,0x8
14955 // For BE: the allowed indices are: 0x7,0xF
14956 uint64_t TargetElems[] = {
14957 0x3074B8FC, // b->w
14958 0x000070F8, // b->d
14959 0x10325476, // h->w
14960 0x00003074, // h->d
14961 0x00001032, // w->d
14962 };
14963
14964 uint64_t Elems = 0;
14965 int Index;
14966 SDValue Input;
14967
14968 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14969 if (!Op)
14970 return false;
14971 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14972 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14973 return false;
14974
14975 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14976 // of the right width.
14977 SDValue Extract = Op.getOperand(0);
14978 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14979 Extract = Extract.getOperand(0);
14980 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14981 return false;
14982
14983 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14984 if (!ExtOp)
14985 return false;
14986
14987 Index = ExtOp->getZExtValue();
14988 if (Input && Input != Extract.getOperand(0))
14989 return false;
14990
14991 if (!Input)
14992 Input = Extract.getOperand(0);
14993
14994 Elems = Elems << 8;
14995 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14996 Elems |= Index;
14997
14998 return true;
14999 };
15000
15001 // If the build vector operands aren't sign extended vector extracts,
15002 // of the same input vector, then return.
15003 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15004 if (!isSExtOfVecExtract(N->getOperand(i))) {
15005 return SDValue();
15006 }
15007 }
15008
15009 // If the vector extract indices are not correct, add the appropriate
15010 // vector_shuffle.
15011 int TgtElemArrayIdx;
15012 int InputSize = Input.getValueType().getScalarSizeInBits();
15013 int OutputSize = N->getValueType(0).getScalarSizeInBits();
15014 if (InputSize + OutputSize == 40)
15015 TgtElemArrayIdx = 0;
15016 else if (InputSize + OutputSize == 72)
15017 TgtElemArrayIdx = 1;
15018 else if (InputSize + OutputSize == 48)
15019 TgtElemArrayIdx = 2;
15020 else if (InputSize + OutputSize == 80)
15021 TgtElemArrayIdx = 3;
15022 else if (InputSize + OutputSize == 96)
15023 TgtElemArrayIdx = 4;
15024 else
15025 return SDValue();
15026
15027 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
15028 CorrectElems = DAG.getDataLayout().isLittleEndian()
15029 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
15030 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
15031 if (Elems != CorrectElems) {
15032 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
15033 }
15034
15035 // Regular lowering will catch cases where a shuffle is not needed.
15036 return SDValue();
15037}
15038
15039// Look for the pattern of a load from a narrow width to i128, feeding
15040// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15041// (LXVRZX). This node represents a zero extending load that will be matched
15042// to the Load VSX Vector Rightmost instructions.
15044 SDLoc DL(N);
15045
15046 // This combine is only eligible for a BUILD_VECTOR of v1i128.
15047 if (N->getValueType(0) != MVT::v1i128)
15048 return SDValue();
15049
15050 SDValue Operand = N->getOperand(0);
15051 // Proceed with the transformation if the operand to the BUILD_VECTOR
15052 // is a load instruction.
15053 if (Operand.getOpcode() != ISD::LOAD)
15054 return SDValue();
15055
15056 auto *LD = cast<LoadSDNode>(Operand);
15057 EVT MemoryType = LD->getMemoryVT();
15058
15059 // This transformation is only valid if the we are loading either a byte,
15060 // halfword, word, or doubleword.
15061 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15062 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15063
15064 // Ensure that the load from the narrow width is being zero extended to i128.
15065 if (!ValidLDType ||
15066 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15067 LD->getExtensionType() != ISD::EXTLOAD))
15068 return SDValue();
15069
15070 SDValue LoadOps[] = {
15071 LD->getChain(), LD->getBasePtr(),
15072 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15073
15075 DAG.getVTList(MVT::v1i128, MVT::Other),
15076 LoadOps, MemoryType, LD->getMemOperand());
15077}
15078
15079SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15080 DAGCombinerInfo &DCI) const {
15081 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15082 "Should be called with a BUILD_VECTOR node");
15083
15084 SelectionDAG &DAG = DCI.DAG;
15085 SDLoc dl(N);
15086
15087 if (!Subtarget.hasVSX())
15088 return SDValue();
15089
15090 // The target independent DAG combiner will leave a build_vector of
15091 // float-to-int conversions intact. We can generate MUCH better code for
15092 // a float-to-int conversion of a vector of floats.
15093 SDValue FirstInput = N->getOperand(0);
15094 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15095 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15096 if (Reduced)
15097 return Reduced;
15098 }
15099
15100 // If we're building a vector out of consecutive loads, just load that
15101 // vector type.
15102 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15103 if (Reduced)
15104 return Reduced;
15105
15106 // If we're building a vector out of extended elements from another vector
15107 // we have P9 vector integer extend instructions. The code assumes legal
15108 // input types (i.e. it can't handle things like v4i16) so do not run before
15109 // legalization.
15110 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15111 Reduced = combineBVOfVecSExt(N, DAG);
15112 if (Reduced)
15113 return Reduced;
15114 }
15115
15116 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15117 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15118 // is a load from <valid narrow width> to i128.
15119 if (Subtarget.isISA3_1()) {
15120 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15121 if (BVOfZLoad)
15122 return BVOfZLoad;
15123 }
15124
15125 if (N->getValueType(0) != MVT::v2f64)
15126 return SDValue();
15127
15128 // Looking for:
15129 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15130 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15131 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15132 return SDValue();
15133 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15134 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15135 return SDValue();
15136 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15137 return SDValue();
15138
15139 SDValue Ext1 = FirstInput.getOperand(0);
15140 SDValue Ext2 = N->getOperand(1).getOperand(0);
15141 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15143 return SDValue();
15144
15145 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15146 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15147 if (!Ext1Op || !Ext2Op)
15148 return SDValue();
15149 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15150 Ext1.getOperand(0) != Ext2.getOperand(0))
15151 return SDValue();
15152
15153 int FirstElem = Ext1Op->getZExtValue();
15154 int SecondElem = Ext2Op->getZExtValue();
15155 int SubvecIdx;
15156 if (FirstElem == 0 && SecondElem == 1)
15157 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15158 else if (FirstElem == 2 && SecondElem == 3)
15159 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15160 else
15161 return SDValue();
15162
15163 SDValue SrcVec = Ext1.getOperand(0);
15164 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15166 return DAG.getNode(NodeType, dl, MVT::v2f64,
15167 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15168}
15169
15170SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15171 DAGCombinerInfo &DCI) const {
15172 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15173 N->getOpcode() == ISD::UINT_TO_FP) &&
15174 "Need an int -> FP conversion node here");
15175
15176 if (useSoftFloat() || !Subtarget.has64BitSupport())
15177 return SDValue();
15178
15179 SelectionDAG &DAG = DCI.DAG;
15180 SDLoc dl(N);
15181 SDValue Op(N, 0);
15182
15183 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15184 // from the hardware.
15185 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15186 return SDValue();
15187 if (!Op.getOperand(0).getValueType().isSimple())
15188 return SDValue();
15189 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15190 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15191 return SDValue();
15192
15193 SDValue FirstOperand(Op.getOperand(0));
15194 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15195 (FirstOperand.getValueType() == MVT::i8 ||
15196 FirstOperand.getValueType() == MVT::i16);
15197 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15198 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15199 bool DstDouble = Op.getValueType() == MVT::f64;
15200 unsigned ConvOp = Signed ?
15201 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15202 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15203 SDValue WidthConst =
15204 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15205 dl, false);
15206 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15207 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15209 DAG.getVTList(MVT::f64, MVT::Other),
15210 Ops, MVT::i8, LDN->getMemOperand());
15211 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15212
15213 // For signed conversion, we need to sign-extend the value in the VSR
15214 if (Signed) {
15215 SDValue ExtOps[] = { Ld, WidthConst };
15216 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15217 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15218 } else
15219 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15220 }
15221
15222
15223 // For i32 intermediate values, unfortunately, the conversion functions
15224 // leave the upper 32 bits of the value are undefined. Within the set of
15225 // scalar instructions, we have no method for zero- or sign-extending the
15226 // value. Thus, we cannot handle i32 intermediate values here.
15227 if (Op.getOperand(0).getValueType() == MVT::i32)
15228 return SDValue();
15229
15230 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15231 "UINT_TO_FP is supported only with FPCVT");
15232
15233 // If we have FCFIDS, then use it when converting to single-precision.
15234 // Otherwise, convert to double-precision and then round.
15235 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15236 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15238 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15239 : PPCISD::FCFID);
15240 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15241 ? MVT::f32
15242 : MVT::f64;
15243
15244 // If we're converting from a float, to an int, and back to a float again,
15245 // then we don't need the store/load pair at all.
15246 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15247 Subtarget.hasFPCVT()) ||
15248 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15249 SDValue Src = Op.getOperand(0).getOperand(0);
15250 if (Src.getValueType() == MVT::f32) {
15251 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15252 DCI.AddToWorklist(Src.getNode());
15253 } else if (Src.getValueType() != MVT::f64) {
15254 // Make sure that we don't pick up a ppc_fp128 source value.
15255 return SDValue();
15256 }
15257
15258 unsigned FCTOp =
15259 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15261
15262 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15263 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15264
15265 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15266 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15267 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15268 DCI.AddToWorklist(FP.getNode());
15269 }
15270
15271 return FP;
15272 }
15273
15274 return SDValue();
15275}
15276
15277// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15278// builtins) into loads with swaps.
15280 DAGCombinerInfo &DCI) const {
15281 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15282 // load combines.
15283 if (DCI.isBeforeLegalizeOps())
15284 return SDValue();
15285
15286 SelectionDAG &DAG = DCI.DAG;
15287 SDLoc dl(N);
15288 SDValue Chain;
15289 SDValue Base;
15290 MachineMemOperand *MMO;
15291
15292 switch (N->getOpcode()) {
15293 default:
15294 llvm_unreachable("Unexpected opcode for little endian VSX load");
15295 case ISD::LOAD: {
15296 LoadSDNode *LD = cast<LoadSDNode>(N);
15297 Chain = LD->getChain();
15298 Base = LD->getBasePtr();
15299 MMO = LD->getMemOperand();
15300 // If the MMO suggests this isn't a load of a full vector, leave
15301 // things alone. For a built-in, we have to make the change for
15302 // correctness, so if there is a size problem that will be a bug.
15303 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15304 return SDValue();
15305 break;
15306 }
15308 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15309 Chain = Intrin->getChain();
15310 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15311 // us what we want. Get operand 2 instead.
15312 Base = Intrin->getOperand(2);
15313 MMO = Intrin->getMemOperand();
15314 break;
15315 }
15316 }
15317
15318 MVT VecTy = N->getValueType(0).getSimpleVT();
15319
15320 SDValue LoadOps[] = { Chain, Base };
15322 DAG.getVTList(MVT::v2f64, MVT::Other),
15323 LoadOps, MVT::v2f64, MMO);
15324
15325 DCI.AddToWorklist(Load.getNode());
15326 Chain = Load.getValue(1);
15327 SDValue Swap = DAG.getNode(
15328 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15329 DCI.AddToWorklist(Swap.getNode());
15330
15331 // Add a bitcast if the resulting load type doesn't match v2f64.
15332 if (VecTy != MVT::v2f64) {
15333 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15334 DCI.AddToWorklist(N.getNode());
15335 // Package {bitcast value, swap's chain} to match Load's shape.
15336 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15337 N, Swap.getValue(1));
15338 }
15339
15340 return Swap;
15341}
15342
15343// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15344// builtins) into stores with swaps.
15346 DAGCombinerInfo &DCI) const {
15347 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15348 // store combines.
15349 if (DCI.isBeforeLegalizeOps())
15350 return SDValue();
15351
15352 SelectionDAG &DAG = DCI.DAG;
15353 SDLoc dl(N);
15354 SDValue Chain;
15355 SDValue Base;
15356 unsigned SrcOpnd;
15357 MachineMemOperand *MMO;
15358
15359 switch (N->getOpcode()) {
15360 default:
15361 llvm_unreachable("Unexpected opcode for little endian VSX store");
15362 case ISD::STORE: {
15363 StoreSDNode *ST = cast<StoreSDNode>(N);
15364 Chain = ST->getChain();
15365 Base = ST->getBasePtr();
15366 MMO = ST->getMemOperand();
15367 SrcOpnd = 1;
15368 // If the MMO suggests this isn't a store of a full vector, leave
15369 // things alone. For a built-in, we have to make the change for
15370 // correctness, so if there is a size problem that will be a bug.
15371 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15372 return SDValue();
15373 break;
15374 }
15375 case ISD::INTRINSIC_VOID: {
15376 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15377 Chain = Intrin->getChain();
15378 // Intrin->getBasePtr() oddly does not get what we want.
15379 Base = Intrin->getOperand(3);
15380 MMO = Intrin->getMemOperand();
15381 SrcOpnd = 2;
15382 break;
15383 }
15384 }
15385
15386 SDValue Src = N->getOperand(SrcOpnd);
15387 MVT VecTy = Src.getValueType().getSimpleVT();
15388
15389 // All stores are done as v2f64 and possible bit cast.
15390 if (VecTy != MVT::v2f64) {
15391 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15392 DCI.AddToWorklist(Src.getNode());
15393 }
15394
15395 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15396 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15397 DCI.AddToWorklist(Swap.getNode());
15398 Chain = Swap.getValue(1);
15399 SDValue StoreOps[] = { Chain, Swap, Base };
15401 DAG.getVTList(MVT::Other),
15402 StoreOps, VecTy, MMO);
15403 DCI.AddToWorklist(Store.getNode());
15404 return Store;
15405}
15406
15407// Handle DAG combine for STORE (FP_TO_INT F).
15408SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15409 DAGCombinerInfo &DCI) const {
15410 SelectionDAG &DAG = DCI.DAG;
15411 SDLoc dl(N);
15412 unsigned Opcode = N->getOperand(1).getOpcode();
15413 (void)Opcode;
15414 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15415
15416 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15417 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15418 && "Not a FP_TO_INT Instruction!");
15419
15420 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15421 EVT Op1VT = N->getOperand(1).getValueType();
15422 EVT ResVT = Val.getValueType();
15423
15424 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15425 return SDValue();
15426
15427 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15428 bool ValidTypeForStoreFltAsInt =
15429 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15430 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15431
15432 // TODO: Lower conversion from f128 on all VSX targets
15433 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15434 return SDValue();
15435
15436 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15437 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15438 return SDValue();
15439
15440 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15441
15442 // Set number of bytes being converted.
15443 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15444 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15445 DAG.getIntPtrConstant(ByteSize, dl, false),
15446 DAG.getValueType(Op1VT)};
15447
15449 DAG.getVTList(MVT::Other), Ops,
15450 cast<StoreSDNode>(N)->getMemoryVT(),
15451 cast<StoreSDNode>(N)->getMemOperand());
15452
15453 return Val;
15454}
15455
15456static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15457 // Check that the source of the element keeps flipping
15458 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15459 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15460 for (int i = 1, e = Mask.size(); i < e; i++) {
15461 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15462 return false;
15463 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15464 return false;
15465 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15466 }
15467 return true;
15468}
15469
15470static bool isSplatBV(SDValue Op) {
15471 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15472 return false;
15473 SDValue FirstOp;
15474
15475 // Find first non-undef input.
15476 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15477 FirstOp = Op.getOperand(i);
15478 if (!FirstOp.isUndef())
15479 break;
15480 }
15481
15482 // All inputs are undef or the same as the first non-undef input.
15483 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15484 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15485 return false;
15486 return true;
15487}
15488
15490 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15491 return Op;
15492 if (Op.getOpcode() != ISD::BITCAST)
15493 return SDValue();
15494 Op = Op.getOperand(0);
15495 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15496 return Op;
15497 return SDValue();
15498}
15499
15500// Fix up the shuffle mask to account for the fact that the result of
15501// scalar_to_vector is not in lane zero. This just takes all values in
15502// the ranges specified by the min/max indices and adds the number of
15503// elements required to ensure each element comes from the respective
15504// position in the valid lane.
15505// On little endian, that's just the corresponding element in the other
15506// half of the vector. On big endian, it is in the same half but right
15507// justified rather than left justified in that half.
15509 int LHSMaxIdx, int RHSMinIdx,
15510 int RHSMaxIdx, int HalfVec,
15511 unsigned ValidLaneWidth,
15512 const PPCSubtarget &Subtarget) {
15513 for (int i = 0, e = ShuffV.size(); i < e; i++) {
15514 int Idx = ShuffV[i];
15515 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15516 ShuffV[i] +=
15517 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15518 }
15519}
15520
15521// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15522// the original is:
15523// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15524// In such a case, just change the shuffle mask to extract the element
15525// from the permuted index.
15527 const PPCSubtarget &Subtarget) {
15528 SDLoc dl(OrigSToV);
15529 EVT VT = OrigSToV.getValueType();
15530 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15531 "Expecting a SCALAR_TO_VECTOR here");
15532 SDValue Input = OrigSToV.getOperand(0);
15533
15534 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15535 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15536 SDValue OrigVector = Input.getOperand(0);
15537
15538 // Can't handle non-const element indices or different vector types
15539 // for the input to the extract and the output of the scalar_to_vector.
15540 if (Idx && VT == OrigVector.getValueType()) {
15541 unsigned NumElts = VT.getVectorNumElements();
15542 assert(
15543 NumElts > 1 &&
15544 "Cannot produce a permuted scalar_to_vector for one element vector");
15545 SmallVector<int, 16> NewMask(NumElts, -1);
15546 unsigned ResultInElt = NumElts / 2;
15547 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15548 NewMask[ResultInElt] = Idx->getZExtValue();
15549 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15550 }
15551 }
15552 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15553 OrigSToV.getOperand(0));
15554}
15555
15556// On little endian subtargets, combine shuffles such as:
15557// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15558// into:
15559// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15560// because the latter can be matched to a single instruction merge.
15561// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15562// to put the value into element zero. Adjust the shuffle mask so that the
15563// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15564// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15565// nodes with elements smaller than doubleword because all the ways
15566// of getting scalar data into a vector register put the value in the
15567// rightmost element of the left half of the vector.
15568SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15569 SelectionDAG &DAG) const {
15570 SDValue LHS = SVN->getOperand(0);
15571 SDValue RHS = SVN->getOperand(1);
15572 auto Mask = SVN->getMask();
15573 int NumElts = LHS.getValueType().getVectorNumElements();
15574 SDValue Res(SVN, 0);
15575 SDLoc dl(SVN);
15576 bool IsLittleEndian = Subtarget.isLittleEndian();
15577
15578 // On big endian targets this is only useful for subtargets with direct moves.
15579 // On little endian targets it would be useful for all subtargets with VSX.
15580 // However adding special handling for LE subtargets without direct moves
15581 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15582 // which includes direct moves.
15583 if (!Subtarget.hasDirectMove())
15584 return Res;
15585
15586 // If this is not a shuffle of a shuffle and the first element comes from
15587 // the second vector, canonicalize to the commuted form. This will make it
15588 // more likely to match one of the single instruction patterns.
15589 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15590 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15591 std::swap(LHS, RHS);
15592 Res = DAG.getCommutedVectorShuffle(*SVN);
15593 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15594 }
15595
15596 // Adjust the shuffle mask if either input vector comes from a
15597 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15598 // form (to prevent the need for a swap).
15599 SmallVector<int, 16> ShuffV(Mask);
15600 SDValue SToVLHS = isScalarToVec(LHS);
15601 SDValue SToVRHS = isScalarToVec(RHS);
15602 if (SToVLHS || SToVRHS) {
15603 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15604 // same type and have differing element sizes, then do not perform
15605 // the following transformation. The current transformation for
15606 // SCALAR_TO_VECTOR assumes that both input vectors have the same
15607 // element size. This will be updated in the future to account for
15608 // differing sizes of the LHS and RHS.
15609 if (SToVLHS && SToVRHS &&
15610 (SToVLHS.getValueType().getScalarSizeInBits() !=
15611 SToVRHS.getValueType().getScalarSizeInBits()))
15612 return Res;
15613
15614 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15615 : SToVRHS.getValueType().getVectorNumElements();
15616 int NumEltsOut = ShuffV.size();
15617 // The width of the "valid lane" (i.e. the lane that contains the value that
15618 // is vectorized) needs to be expressed in terms of the number of elements
15619 // of the shuffle. It is thereby the ratio of the values before and after
15620 // any bitcast.
15621 unsigned ValidLaneWidth =
15622 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15623 LHS.getValueType().getScalarSizeInBits()
15624 : SToVRHS.getValueType().getScalarSizeInBits() /
15625 RHS.getValueType().getScalarSizeInBits();
15626
15627 // Initially assume that neither input is permuted. These will be adjusted
15628 // accordingly if either input is.
15629 int LHSMaxIdx = -1;
15630 int RHSMinIdx = -1;
15631 int RHSMaxIdx = -1;
15632 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15633
15634 // Get the permuted scalar to vector nodes for the source(s) that come from
15635 // ISD::SCALAR_TO_VECTOR.
15636 // On big endian systems, this only makes sense for element sizes smaller
15637 // than 64 bits since for 64-bit elements, all instructions already put
15638 // the value into element zero. Since scalar size of LHS and RHS may differ
15639 // after isScalarToVec, this should be checked using their own sizes.
15640 if (SToVLHS) {
15641 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15642 return Res;
15643 // Set up the values for the shuffle vector fixup.
15644 LHSMaxIdx = NumEltsOut / NumEltsIn;
15645 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15646 if (SToVLHS.getValueType() != LHS.getValueType())
15647 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15648 LHS = SToVLHS;
15649 }
15650 if (SToVRHS) {
15651 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15652 return Res;
15653 RHSMinIdx = NumEltsOut;
15654 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15655 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15656 if (SToVRHS.getValueType() != RHS.getValueType())
15657 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15658 RHS = SToVRHS;
15659 }
15660
15661 // Fix up the shuffle mask to reflect where the desired element actually is.
15662 // The minimum and maximum indices that correspond to element zero for both
15663 // the LHS and RHS are computed and will control which shuffle mask entries
15664 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15665 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15666 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15667 HalfVec, ValidLaneWidth, Subtarget);
15668 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15669
15670 // We may have simplified away the shuffle. We won't be able to do anything
15671 // further with it here.
15672 if (!isa<ShuffleVectorSDNode>(Res))
15673 return Res;
15674 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15675 }
15676
15677 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15678 // The common case after we commuted the shuffle is that the RHS is a splat
15679 // and we have elements coming in from the splat at indices that are not
15680 // conducive to using a merge.
15681 // Example:
15682 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15683 if (!isSplatBV(TheSplat))
15684 return Res;
15685
15686 // We are looking for a mask such that all even elements are from
15687 // one vector and all odd elements from the other.
15688 if (!isAlternatingShuffMask(Mask, NumElts))
15689 return Res;
15690
15691 // Adjust the mask so we are pulling in the same index from the splat
15692 // as the index from the interesting vector in consecutive elements.
15693 if (IsLittleEndian) {
15694 // Example (even elements from first vector):
15695 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15696 if (Mask[0] < NumElts)
15697 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15698 if (ShuffV[i] < 0)
15699 continue;
15700 // If element from non-splat is undef, pick first element from splat.
15701 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15702 }
15703 // Example (odd elements from first vector):
15704 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15705 else
15706 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15707 if (ShuffV[i] < 0)
15708 continue;
15709 // If element from non-splat is undef, pick first element from splat.
15710 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15711 }
15712 } else {
15713 // Example (even elements from first vector):
15714 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15715 if (Mask[0] < NumElts)
15716 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15717 if (ShuffV[i] < 0)
15718 continue;
15719 // If element from non-splat is undef, pick first element from splat.
15720 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
15721 }
15722 // Example (odd elements from first vector):
15723 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15724 else
15725 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15726 if (ShuffV[i] < 0)
15727 continue;
15728 // If element from non-splat is undef, pick first element from splat.
15729 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
15730 }
15731 }
15732
15733 // If the RHS has undefs, we need to remove them since we may have created
15734 // a shuffle that adds those instead of the splat value.
15735 SDValue SplatVal =
15736 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15737 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15738
15739 if (IsLittleEndian)
15740 RHS = TheSplat;
15741 else
15742 LHS = TheSplat;
15743 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15744}
15745
15746SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15747 LSBaseSDNode *LSBase,
15748 DAGCombinerInfo &DCI) const {
15749 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15750 "Not a reverse memop pattern!");
15751
15752 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15753 auto Mask = SVN->getMask();
15754 int i = 0;
15755 auto I = Mask.rbegin();
15756 auto E = Mask.rend();
15757
15758 for (; I != E; ++I) {
15759 if (*I != i)
15760 return false;
15761 i++;
15762 }
15763 return true;
15764 };
15765
15766 SelectionDAG &DAG = DCI.DAG;
15767 EVT VT = SVN->getValueType(0);
15768
15769 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15770 return SDValue();
15771
15772 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15773 // See comment in PPCVSXSwapRemoval.cpp.
15774 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15775 if (!Subtarget.hasP9Vector())
15776 return SDValue();
15777
15778 if(!IsElementReverse(SVN))
15779 return SDValue();
15780
15781 if (LSBase->getOpcode() == ISD::LOAD) {
15782 // If the load return value 0 has more than one user except the
15783 // shufflevector instruction, it is not profitable to replace the
15784 // shufflevector with a reverse load.
15785 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15786 UI != UE; ++UI)
15787 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15788 return SDValue();
15789
15790 SDLoc dl(LSBase);
15791 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15792 return DAG.getMemIntrinsicNode(
15793 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15794 LSBase->getMemoryVT(), LSBase->getMemOperand());
15795 }
15796
15797 if (LSBase->getOpcode() == ISD::STORE) {
15798 // If there are other uses of the shuffle, the swap cannot be avoided.
15799 // Forcing the use of an X-Form (since swapped stores only have
15800 // X-Forms) without removing the swap is unprofitable.
15801 if (!SVN->hasOneUse())
15802 return SDValue();
15803
15804 SDLoc dl(LSBase);
15805 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15806 LSBase->getBasePtr()};
15807 return DAG.getMemIntrinsicNode(
15808 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15809 LSBase->getMemoryVT(), LSBase->getMemOperand());
15810 }
15811
15812 llvm_unreachable("Expected a load or store node here");
15813}
15814
15815static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15816 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
15817 if (IntrinsicID == Intrinsic::ppc_stdcx)
15818 StoreWidth = 8;
15819 else if (IntrinsicID == Intrinsic::ppc_stwcx)
15820 StoreWidth = 4;
15821 else if (IntrinsicID == Intrinsic::ppc_sthcx)
15822 StoreWidth = 2;
15823 else if (IntrinsicID == Intrinsic::ppc_stbcx)
15824 StoreWidth = 1;
15825 else
15826 return false;
15827 return true;
15828}
15829
15831 DAGCombinerInfo &DCI) const {
15832 SelectionDAG &DAG = DCI.DAG;
15833 SDLoc dl(N);
15834 switch (N->getOpcode()) {
15835 default: break;
15836 case ISD::ADD:
15837 return combineADD(N, DCI);
15838 case ISD::AND: {
15839 // We don't want (and (zext (shift...)), C) if C fits in the width of the
15840 // original input as that will prevent us from selecting optimal rotates.
15841 // This only matters if the input to the extend is i32 widened to i64.
15842 SDValue Op1 = N->getOperand(0);
15843 SDValue Op2 = N->getOperand(1);
15844 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15845 Op1.getOpcode() != ISD::ANY_EXTEND) ||
15846 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15847 Op1.getOperand(0).getValueType() != MVT::i32)
15848 break;
15849 SDValue NarrowOp = Op1.getOperand(0);
15850 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15851 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15852 break;
15853
15854 uint64_t Imm = Op2->getAsZExtVal();
15855 // Make sure that the constant is narrow enough to fit in the narrow type.
15856 if (!isUInt<32>(Imm))
15857 break;
15858 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15859 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15860 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15861 }
15862 case ISD::SHL:
15863 return combineSHL(N, DCI);
15864 case ISD::SRA:
15865 return combineSRA(N, DCI);
15866 case ISD::SRL:
15867 return combineSRL(N, DCI);
15868 case ISD::MUL:
15869 return combineMUL(N, DCI);
15870 case ISD::FMA:
15871 case PPCISD::FNMSUB:
15872 return combineFMALike(N, DCI);
15873 case PPCISD::SHL:
15874 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15875 return N->getOperand(0);
15876 break;
15877 case PPCISD::SRL:
15878 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15879 return N->getOperand(0);
15880 break;
15881 case PPCISD::SRA:
15882 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15883 if (C->isZero() || // 0 >>s V -> 0.
15884 C->isAllOnes()) // -1 >>s V -> -1.
15885 return N->getOperand(0);
15886 }
15887 break;
15888 case ISD::SIGN_EXTEND:
15889 case ISD::ZERO_EXTEND:
15890 case ISD::ANY_EXTEND:
15891 return DAGCombineExtBoolTrunc(N, DCI);
15892 case ISD::TRUNCATE:
15893 return combineTRUNCATE(N, DCI);
15894 case ISD::SETCC:
15895 if (SDValue CSCC = combineSetCC(N, DCI))
15896 return CSCC;
15897 [[fallthrough]];
15898 case ISD::SELECT_CC:
15899 return DAGCombineTruncBoolExt(N, DCI);
15900 case ISD::SINT_TO_FP:
15901 case ISD::UINT_TO_FP:
15902 return combineFPToIntToFP(N, DCI);
15904 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15905 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15906 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15907 }
15908 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15909 case ISD::STORE: {
15910
15911 EVT Op1VT = N->getOperand(1).getValueType();
15912 unsigned Opcode = N->getOperand(1).getOpcode();
15913
15914 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15915 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15916 SDValue Val = combineStoreFPToInt(N, DCI);
15917 if (Val)
15918 return Val;
15919 }
15920
15921 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15922 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15923 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15924 if (Val)
15925 return Val;
15926 }
15927
15928 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15929 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15930 N->getOperand(1).getNode()->hasOneUse() &&
15931 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15932 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15933
15934 // STBRX can only handle simple types and it makes no sense to store less
15935 // two bytes in byte-reversed order.
15936 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15937 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15938 break;
15939
15940 SDValue BSwapOp = N->getOperand(1).getOperand(0);
15941 // Do an any-extend to 32-bits if this is a half-word input.
15942 if (BSwapOp.getValueType() == MVT::i16)
15943 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15944
15945 // If the type of BSWAP operand is wider than stored memory width
15946 // it need to be shifted to the right side before STBRX.
15947 if (Op1VT.bitsGT(mVT)) {
15948 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15949 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15950 DAG.getConstant(Shift, dl, MVT::i32));
15951 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15952 if (Op1VT == MVT::i64)
15953 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15954 }
15955
15956 SDValue Ops[] = {
15957 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15958 };
15959 return
15960 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15961 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15962 cast<StoreSDNode>(N)->getMemOperand());
15963 }
15964
15965 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15966 // So it can increase the chance of CSE constant construction.
15967 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15968 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15969 // Need to sign-extended to 64-bits to handle negative values.
15970 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15971 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15972 MemVT.getSizeInBits());
15973 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15974
15975 // DAG.getTruncStore() can't be used here because it doesn't accept
15976 // the general (base + offset) addressing mode.
15977 // So we use UpdateNodeOperands and setTruncatingStore instead.
15978 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15979 N->getOperand(3));
15980 cast<StoreSDNode>(N)->setTruncatingStore(true);
15981 return SDValue(N, 0);
15982 }
15983
15984 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15985 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15986 if (Op1VT.isSimple()) {
15987 MVT StoreVT = Op1VT.getSimpleVT();
15988 if (Subtarget.needsSwapsForVSXMemOps() &&
15989 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15990 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15991 return expandVSXStoreForLE(N, DCI);
15992 }
15993 break;
15994 }
15995 case ISD::LOAD: {
15996 LoadSDNode *LD = cast<LoadSDNode>(N);
15997 EVT VT = LD->getValueType(0);
15998
15999 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16000 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16001 if (VT.isSimple()) {
16002 MVT LoadVT = VT.getSimpleVT();
16003 if (Subtarget.needsSwapsForVSXMemOps() &&
16004 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
16005 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
16006 return expandVSXLoadForLE(N, DCI);
16007 }
16008
16009 // We sometimes end up with a 64-bit integer load, from which we extract
16010 // two single-precision floating-point numbers. This happens with
16011 // std::complex<float>, and other similar structures, because of the way we
16012 // canonicalize structure copies. However, if we lack direct moves,
16013 // then the final bitcasts from the extracted integer values to the
16014 // floating-point numbers turn into store/load pairs. Even with direct moves,
16015 // just loading the two floating-point numbers is likely better.
16016 auto ReplaceTwoFloatLoad = [&]() {
16017 if (VT != MVT::i64)
16018 return false;
16019
16020 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
16021 LD->isVolatile())
16022 return false;
16023
16024 // We're looking for a sequence like this:
16025 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
16026 // t16: i64 = srl t13, Constant:i32<32>
16027 // t17: i32 = truncate t16
16028 // t18: f32 = bitcast t17
16029 // t19: i32 = truncate t13
16030 // t20: f32 = bitcast t19
16031
16032 if (!LD->hasNUsesOfValue(2, 0))
16033 return false;
16034
16035 auto UI = LD->use_begin();
16036 while (UI.getUse().getResNo() != 0) ++UI;
16037 SDNode *Trunc = *UI++;
16038 while (UI.getUse().getResNo() != 0) ++UI;
16039 SDNode *RightShift = *UI;
16040 if (Trunc->getOpcode() != ISD::TRUNCATE)
16041 std::swap(Trunc, RightShift);
16042
16043 if (Trunc->getOpcode() != ISD::TRUNCATE ||
16044 Trunc->getValueType(0) != MVT::i32 ||
16045 !Trunc->hasOneUse())
16046 return false;
16047 if (RightShift->getOpcode() != ISD::SRL ||
16048 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
16049 RightShift->getConstantOperandVal(1) != 32 ||
16050 !RightShift->hasOneUse())
16051 return false;
16052
16053 SDNode *Trunc2 = *RightShift->use_begin();
16054 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16055 Trunc2->getValueType(0) != MVT::i32 ||
16056 !Trunc2->hasOneUse())
16057 return false;
16058
16059 SDNode *Bitcast = *Trunc->use_begin();
16060 SDNode *Bitcast2 = *Trunc2->use_begin();
16061
16062 if (Bitcast->getOpcode() != ISD::BITCAST ||
16063 Bitcast->getValueType(0) != MVT::f32)
16064 return false;
16065 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16066 Bitcast2->getValueType(0) != MVT::f32)
16067 return false;
16068
16069 if (Subtarget.isLittleEndian())
16070 std::swap(Bitcast, Bitcast2);
16071
16072 // Bitcast has the second float (in memory-layout order) and Bitcast2
16073 // has the first one.
16074
16075 SDValue BasePtr = LD->getBasePtr();
16076 if (LD->isIndexed()) {
16077 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16078 "Non-pre-inc AM on PPC?");
16079 BasePtr =
16080 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16081 LD->getOffset());
16082 }
16083
16084 auto MMOFlags =
16085 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16086 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16087 LD->getPointerInfo(), LD->getAlign(),
16088 MMOFlags, LD->getAAInfo());
16089 SDValue AddPtr =
16090 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16091 BasePtr, DAG.getIntPtrConstant(4, dl));
16092 SDValue FloatLoad2 = DAG.getLoad(
16093 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16094 LD->getPointerInfo().getWithOffset(4),
16095 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16096
16097 if (LD->isIndexed()) {
16098 // Note that DAGCombine should re-form any pre-increment load(s) from
16099 // what is produced here if that makes sense.
16100 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16101 }
16102
16103 DCI.CombineTo(Bitcast2, FloatLoad);
16104 DCI.CombineTo(Bitcast, FloatLoad2);
16105
16106 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16107 SDValue(FloatLoad2.getNode(), 1));
16108 return true;
16109 };
16110
16111 if (ReplaceTwoFloatLoad())
16112 return SDValue(N, 0);
16113
16114 EVT MemVT = LD->getMemoryVT();
16115 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16116 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16117 if (LD->isUnindexed() && VT.isVector() &&
16118 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16119 // P8 and later hardware should just use LOAD.
16120 !Subtarget.hasP8Vector() &&
16121 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16122 VT == MVT::v4f32))) &&
16123 LD->getAlign() < ABIAlignment) {
16124 // This is a type-legal unaligned Altivec load.
16125 SDValue Chain = LD->getChain();
16126 SDValue Ptr = LD->getBasePtr();
16127 bool isLittleEndian = Subtarget.isLittleEndian();
16128
16129 // This implements the loading of unaligned vectors as described in
16130 // the venerable Apple Velocity Engine overview. Specifically:
16131 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16132 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16133 //
16134 // The general idea is to expand a sequence of one or more unaligned
16135 // loads into an alignment-based permutation-control instruction (lvsl
16136 // or lvsr), a series of regular vector loads (which always truncate
16137 // their input address to an aligned address), and a series of
16138 // permutations. The results of these permutations are the requested
16139 // loaded values. The trick is that the last "extra" load is not taken
16140 // from the address you might suspect (sizeof(vector) bytes after the
16141 // last requested load), but rather sizeof(vector) - 1 bytes after the
16142 // last requested vector. The point of this is to avoid a page fault if
16143 // the base address happened to be aligned. This works because if the
16144 // base address is aligned, then adding less than a full vector length
16145 // will cause the last vector in the sequence to be (re)loaded.
16146 // Otherwise, the next vector will be fetched as you might suspect was
16147 // necessary.
16148
16149 // We might be able to reuse the permutation generation from
16150 // a different base address offset from this one by an aligned amount.
16151 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16152 // optimization later.
16153 Intrinsic::ID Intr, IntrLD, IntrPerm;
16154 MVT PermCntlTy, PermTy, LDTy;
16155 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16156 : Intrinsic::ppc_altivec_lvsl;
16157 IntrLD = Intrinsic::ppc_altivec_lvx;
16158 IntrPerm = Intrinsic::ppc_altivec_vperm;
16159 PermCntlTy = MVT::v16i8;
16160 PermTy = MVT::v4i32;
16161 LDTy = MVT::v4i32;
16162
16163 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16164
16165 // Create the new MMO for the new base load. It is like the original MMO,
16166 // but represents an area in memory almost twice the vector size centered
16167 // on the original address. If the address is unaligned, we might start
16168 // reading up to (sizeof(vector)-1) bytes below the address of the
16169 // original unaligned load.
16171 MachineMemOperand *BaseMMO =
16172 MF.getMachineMemOperand(LD->getMemOperand(),
16173 -(int64_t)MemVT.getStoreSize()+1,
16174 2*MemVT.getStoreSize()-1);
16175
16176 // Create the new base load.
16177 SDValue LDXIntID =
16178 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16179 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16180 SDValue BaseLoad =
16182 DAG.getVTList(PermTy, MVT::Other),
16183 BaseLoadOps, LDTy, BaseMMO);
16184
16185 // Note that the value of IncOffset (which is provided to the next
16186 // load's pointer info offset value, and thus used to calculate the
16187 // alignment), and the value of IncValue (which is actually used to
16188 // increment the pointer value) are different! This is because we
16189 // require the next load to appear to be aligned, even though it
16190 // is actually offset from the base pointer by a lesser amount.
16191 int IncOffset = VT.getSizeInBits() / 8;
16192 int IncValue = IncOffset;
16193
16194 // Walk (both up and down) the chain looking for another load at the real
16195 // (aligned) offset (the alignment of the other load does not matter in
16196 // this case). If found, then do not use the offset reduction trick, as
16197 // that will prevent the loads from being later combined (as they would
16198 // otherwise be duplicates).
16199 if (!findConsecutiveLoad(LD, DAG))
16200 --IncValue;
16201
16202 SDValue Increment =
16203 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16204 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16205
16206 MachineMemOperand *ExtraMMO =
16207 MF.getMachineMemOperand(LD->getMemOperand(),
16208 1, 2*MemVT.getStoreSize()-1);
16209 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16210 SDValue ExtraLoad =
16212 DAG.getVTList(PermTy, MVT::Other),
16213 ExtraLoadOps, LDTy, ExtraMMO);
16214
16215 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16216 BaseLoad.getValue(1), ExtraLoad.getValue(1));
16217
16218 // Because vperm has a big-endian bias, we must reverse the order
16219 // of the input vectors and complement the permute control vector
16220 // when generating little endian code. We have already handled the
16221 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16222 // and ExtraLoad here.
16223 SDValue Perm;
16224 if (isLittleEndian)
16225 Perm = BuildIntrinsicOp(IntrPerm,
16226 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16227 else
16228 Perm = BuildIntrinsicOp(IntrPerm,
16229 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16230
16231 if (VT != PermTy)
16232 Perm = Subtarget.hasAltivec()
16233 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16234 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16235 DAG.getTargetConstant(1, dl, MVT::i64));
16236 // second argument is 1 because this rounding
16237 // is always exact.
16238
16239 // The output of the permutation is our loaded result, the TokenFactor is
16240 // our new chain.
16241 DCI.CombineTo(N, Perm, TF);
16242 return SDValue(N, 0);
16243 }
16244 }
16245 break;
16247 bool isLittleEndian = Subtarget.isLittleEndian();
16248 unsigned IID = N->getConstantOperandVal(0);
16249 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16250 : Intrinsic::ppc_altivec_lvsl);
16251 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16252 SDValue Add = N->getOperand(1);
16253
16254 int Bits = 4 /* 16 byte alignment */;
16255
16256 if (DAG.MaskedValueIsZero(Add->getOperand(1),
16257 APInt::getAllOnes(Bits /* alignment */)
16258 .zext(Add.getScalarValueSizeInBits()))) {
16259 SDNode *BasePtr = Add->getOperand(0).getNode();
16260 for (SDNode *U : BasePtr->uses()) {
16261 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16262 U->getConstantOperandVal(0) == IID) {
16263 // We've found another LVSL/LVSR, and this address is an aligned
16264 // multiple of that one. The results will be the same, so use the
16265 // one we've just found instead.
16266
16267 return SDValue(U, 0);
16268 }
16269 }
16270 }
16271
16272 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16273 SDNode *BasePtr = Add->getOperand(0).getNode();
16274 for (SDNode *U : BasePtr->uses()) {
16275 if (U->getOpcode() == ISD::ADD &&
16276 isa<ConstantSDNode>(U->getOperand(1)) &&
16277 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16278 (1ULL << Bits) ==
16279 0) {
16280 SDNode *OtherAdd = U;
16281 for (SDNode *V : OtherAdd->uses()) {
16282 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16283 V->getConstantOperandVal(0) == IID) {
16284 return SDValue(V, 0);
16285 }
16286 }
16287 }
16288 }
16289 }
16290 }
16291
16292 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16293 // Expose the vabsduw/h/b opportunity for down stream
16294 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16295 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16296 IID == Intrinsic::ppc_altivec_vmaxsh ||
16297 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16298 SDValue V1 = N->getOperand(1);
16299 SDValue V2 = N->getOperand(2);
16300 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16301 V1.getSimpleValueType() == MVT::v8i16 ||
16302 V1.getSimpleValueType() == MVT::v16i8) &&
16303 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16304 // (0-a, a)
16305 if (V1.getOpcode() == ISD::SUB &&
16307 V1.getOperand(1) == V2) {
16308 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16309 }
16310 // (a, 0-a)
16311 if (V2.getOpcode() == ISD::SUB &&
16312 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16313 V2.getOperand(1) == V1) {
16314 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16315 }
16316 // (x-y, y-x)
16317 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16318 V1.getOperand(0) == V2.getOperand(1) &&
16319 V1.getOperand(1) == V2.getOperand(0)) {
16320 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16321 }
16322 }
16323 }
16324 }
16325
16326 break;
16328 switch (N->getConstantOperandVal(1)) {
16329 default:
16330 break;
16331 case Intrinsic::ppc_altivec_vsum4sbs:
16332 case Intrinsic::ppc_altivec_vsum4shs:
16333 case Intrinsic::ppc_altivec_vsum4ubs: {
16334 // These sum-across intrinsics only have a chain due to the side effect
16335 // that they may set the SAT bit. If we know the SAT bit will not be set
16336 // for some inputs, we can replace any uses of their chain with the
16337 // input chain.
16338 if (BuildVectorSDNode *BVN =
16339 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16340 APInt APSplatBits, APSplatUndef;
16341 unsigned SplatBitSize;
16342 bool HasAnyUndefs;
16343 bool BVNIsConstantSplat = BVN->isConstantSplat(
16344 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16345 !Subtarget.isLittleEndian());
16346 // If the constant splat vector is 0, the SAT bit will not be set.
16347 if (BVNIsConstantSplat && APSplatBits == 0)
16348 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16349 }
16350 return SDValue();
16351 }
16352 case Intrinsic::ppc_vsx_lxvw4x:
16353 case Intrinsic::ppc_vsx_lxvd2x:
16354 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16355 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16356 if (Subtarget.needsSwapsForVSXMemOps())
16357 return expandVSXLoadForLE(N, DCI);
16358 break;
16359 }
16360 break;
16362 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16363 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16364 if (Subtarget.needsSwapsForVSXMemOps()) {
16365 switch (N->getConstantOperandVal(1)) {
16366 default:
16367 break;
16368 case Intrinsic::ppc_vsx_stxvw4x:
16369 case Intrinsic::ppc_vsx_stxvd2x:
16370 return expandVSXStoreForLE(N, DCI);
16371 }
16372 }
16373 break;
16374 case ISD::BSWAP: {
16375 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16376 // For subtargets without LDBRX, we can still do better than the default
16377 // expansion even for 64-bit BSWAP (LOAD).
16378 bool Is64BitBswapOn64BitTgt =
16379 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16380 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16381 N->getOperand(0).hasOneUse();
16382 if (IsSingleUseNormalLd &&
16383 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16384 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16385 SDValue Load = N->getOperand(0);
16386 LoadSDNode *LD = cast<LoadSDNode>(Load);
16387 // Create the byte-swapping load.
16388 SDValue Ops[] = {
16389 LD->getChain(), // Chain
16390 LD->getBasePtr(), // Ptr
16391 DAG.getValueType(N->getValueType(0)) // VT
16392 };
16393 SDValue BSLoad =
16395 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16396 MVT::i64 : MVT::i32, MVT::Other),
16397 Ops, LD->getMemoryVT(), LD->getMemOperand());
16398
16399 // If this is an i16 load, insert the truncate.
16400 SDValue ResVal = BSLoad;
16401 if (N->getValueType(0) == MVT::i16)
16402 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16403
16404 // First, combine the bswap away. This makes the value produced by the
16405 // load dead.
16406 DCI.CombineTo(N, ResVal);
16407
16408 // Next, combine the load away, we give it a bogus result value but a real
16409 // chain result. The result value is dead because the bswap is dead.
16410 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16411
16412 // Return N so it doesn't get rechecked!
16413 return SDValue(N, 0);
16414 }
16415 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16416 // before legalization so that the BUILD_PAIR is handled correctly.
16417 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16418 !IsSingleUseNormalLd)
16419 return SDValue();
16420 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16421
16422 // Can't split volatile or atomic loads.
16423 if (!LD->isSimple())
16424 return SDValue();
16425 SDValue BasePtr = LD->getBasePtr();
16426 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16427 LD->getPointerInfo(), LD->getAlign());
16428 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16429 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16430 DAG.getIntPtrConstant(4, dl));
16432 LD->getMemOperand(), 4, 4);
16433 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16434 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16435 SDValue Res;
16436 if (Subtarget.isLittleEndian())
16437 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16438 else
16439 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16440 SDValue TF =
16441 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16442 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16443 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16444 return Res;
16445 }
16446 case PPCISD::VCMP:
16447 // If a VCMP_rec node already exists with exactly the same operands as this
16448 // node, use its result instead of this node (VCMP_rec computes both a CR6
16449 // and a normal output).
16450 //
16451 if (!N->getOperand(0).hasOneUse() &&
16452 !N->getOperand(1).hasOneUse() &&
16453 !N->getOperand(2).hasOneUse()) {
16454
16455 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16456 SDNode *VCMPrecNode = nullptr;
16457
16458 SDNode *LHSN = N->getOperand(0).getNode();
16459 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16460 UI != E; ++UI)
16461 if (UI->getOpcode() == PPCISD::VCMP_rec &&
16462 UI->getOperand(1) == N->getOperand(1) &&
16463 UI->getOperand(2) == N->getOperand(2) &&
16464 UI->getOperand(0) == N->getOperand(0)) {
16465 VCMPrecNode = *UI;
16466 break;
16467 }
16468
16469 // If there is no VCMP_rec node, or if the flag value has a single use,
16470 // don't transform this.
16471 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16472 break;
16473
16474 // Look at the (necessarily single) use of the flag value. If it has a
16475 // chain, this transformation is more complex. Note that multiple things
16476 // could use the value result, which we should ignore.
16477 SDNode *FlagUser = nullptr;
16478 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16479 FlagUser == nullptr; ++UI) {
16480 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16481 SDNode *User = *UI;
16482 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16483 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16484 FlagUser = User;
16485 break;
16486 }
16487 }
16488 }
16489
16490 // If the user is a MFOCRF instruction, we know this is safe.
16491 // Otherwise we give up for right now.
16492 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16493 return SDValue(VCMPrecNode, 0);
16494 }
16495 break;
16496 case ISD::BR_CC: {
16497 // If this is a branch on an altivec predicate comparison, lower this so
16498 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16499 // lowering is done pre-legalize, because the legalizer lowers the predicate
16500 // compare down to code that is difficult to reassemble.
16501 // This code also handles branches that depend on the result of a store
16502 // conditional.
16503 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16504 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16505
16506 int CompareOpc;
16507 bool isDot;
16508
16509 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16510 break;
16511
16512 // Since we are doing this pre-legalize, the RHS can be a constant of
16513 // arbitrary bitwidth which may cause issues when trying to get the value
16514 // from the underlying APInt.
16515 auto RHSAPInt = RHS->getAsAPIntVal();
16516 if (!RHSAPInt.isIntN(64))
16517 break;
16518
16519 unsigned Val = RHSAPInt.getZExtValue();
16520 auto isImpossibleCompare = [&]() {
16521 // If this is a comparison against something other than 0/1, then we know
16522 // that the condition is never/always true.
16523 if (Val != 0 && Val != 1) {
16524 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16525 return N->getOperand(0);
16526 // Always !=, turn it into an unconditional branch.
16527 return DAG.getNode(ISD::BR, dl, MVT::Other,
16528 N->getOperand(0), N->getOperand(4));
16529 }
16530 return SDValue();
16531 };
16532 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16533 unsigned StoreWidth = 0;
16534 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16535 isStoreConditional(LHS, StoreWidth)) {
16536 if (SDValue Impossible = isImpossibleCompare())
16537 return Impossible;
16538 PPC::Predicate CompOpc;
16539 // eq 0 => ne
16540 // ne 0 => eq
16541 // eq 1 => eq
16542 // ne 1 => ne
16543 if (Val == 0)
16544 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16545 else
16546 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16547
16548 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16549 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16550 auto *MemNode = cast<MemSDNode>(LHS);
16551 SDValue ConstSt = DAG.getMemIntrinsicNode(
16553 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16554 MemNode->getMemoryVT(), MemNode->getMemOperand());
16555
16556 SDValue InChain;
16557 // Unchain the branch from the original store conditional.
16558 if (N->getOperand(0) == LHS.getValue(1))
16559 InChain = LHS.getOperand(0);
16560 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16561 SmallVector<SDValue, 4> InChains;
16562 SDValue InTF = N->getOperand(0);
16563 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16564 if (InTF.getOperand(i) != LHS.getValue(1))
16565 InChains.push_back(InTF.getOperand(i));
16566 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16567 }
16568
16569 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16570 DAG.getConstant(CompOpc, dl, MVT::i32),
16571 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16572 ConstSt.getValue(2));
16573 }
16574
16575 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16576 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16577 assert(isDot && "Can't compare against a vector result!");
16578
16579 if (SDValue Impossible = isImpossibleCompare())
16580 return Impossible;
16581
16582 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16583 // Create the PPCISD altivec 'dot' comparison node.
16584 SDValue Ops[] = {
16585 LHS.getOperand(2), // LHS of compare
16586 LHS.getOperand(3), // RHS of compare
16587 DAG.getConstant(CompareOpc, dl, MVT::i32)
16588 };
16589 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16590 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16591
16592 // Unpack the result based on how the target uses it.
16593 PPC::Predicate CompOpc;
16594 switch (LHS.getConstantOperandVal(1)) {
16595 default: // Can't happen, don't crash on invalid number though.
16596 case 0: // Branch on the value of the EQ bit of CR6.
16597 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16598 break;
16599 case 1: // Branch on the inverted value of the EQ bit of CR6.
16600 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16601 break;
16602 case 2: // Branch on the value of the LT bit of CR6.
16603 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16604 break;
16605 case 3: // Branch on the inverted value of the LT bit of CR6.
16606 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16607 break;
16608 }
16609
16610 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16611 DAG.getConstant(CompOpc, dl, MVT::i32),
16612 DAG.getRegister(PPC::CR6, MVT::i32),
16613 N->getOperand(4), CompNode.getValue(1));
16614 }
16615 break;
16616 }
16617 case ISD::BUILD_VECTOR:
16618 return DAGCombineBuildVector(N, DCI);
16619 }
16620
16621 return SDValue();
16622}
16623
16624SDValue
16626 SelectionDAG &DAG,
16627 SmallVectorImpl<SDNode *> &Created) const {
16628 // fold (sdiv X, pow2)
16629 EVT VT = N->getValueType(0);
16630 if (VT == MVT::i64 && !Subtarget.isPPC64())
16631 return SDValue();
16632 if ((VT != MVT::i32 && VT != MVT::i64) ||
16633 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16634 return SDValue();
16635
16636 SDLoc DL(N);
16637 SDValue N0 = N->getOperand(0);
16638
16639 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16640 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16641 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16642
16643 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16644 Created.push_back(Op.getNode());
16645
16646 if (IsNegPow2) {
16647 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16648 Created.push_back(Op.getNode());
16649 }
16650
16651 return Op;
16652}
16653
16654//===----------------------------------------------------------------------===//
16655// Inline Assembly Support
16656//===----------------------------------------------------------------------===//
16657
16659 KnownBits &Known,
16660 const APInt &DemandedElts,
16661 const SelectionDAG &DAG,
16662 unsigned Depth) const {
16663 Known.resetAll();
16664 switch (Op.getOpcode()) {
16665 default: break;
16666 case PPCISD::LBRX: {
16667 // lhbrx is known to have the top bits cleared out.
16668 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16669 Known.Zero = 0xFFFF0000;
16670 break;
16671 }
16673 switch (Op.getConstantOperandVal(0)) {
16674 default: break;
16675 case Intrinsic::ppc_altivec_vcmpbfp_p:
16676 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16677 case Intrinsic::ppc_altivec_vcmpequb_p:
16678 case Intrinsic::ppc_altivec_vcmpequh_p:
16679 case Intrinsic::ppc_altivec_vcmpequw_p:
16680 case Intrinsic::ppc_altivec_vcmpequd_p:
16681 case Intrinsic::ppc_altivec_vcmpequq_p:
16682 case Intrinsic::ppc_altivec_vcmpgefp_p:
16683 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16684 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16685 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16686 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16687 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16688 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16689 case Intrinsic::ppc_altivec_vcmpgtub_p:
16690 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16691 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16692 case Intrinsic::ppc_altivec_vcmpgtud_p:
16693 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16694 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16695 break;
16696 }
16697 break;
16698 }
16700 switch (Op.getConstantOperandVal(1)) {
16701 default:
16702 break;
16703 case Intrinsic::ppc_load2r:
16704 // Top bits are cleared for load2r (which is the same as lhbrx).
16705 Known.Zero = 0xFFFF0000;
16706 break;
16707 }
16708 break;
16709 }
16710 }
16711}
16712
16714 switch (Subtarget.getCPUDirective()) {
16715 default: break;
16716 case PPC::DIR_970:
16717 case PPC::DIR_PWR4:
16718 case PPC::DIR_PWR5:
16719 case PPC::DIR_PWR5X:
16720 case PPC::DIR_PWR6:
16721 case PPC::DIR_PWR6X:
16722 case PPC::DIR_PWR7:
16723 case PPC::DIR_PWR8:
16724 case PPC::DIR_PWR9:
16725 case PPC::DIR_PWR10:
16726 case PPC::DIR_PWR11:
16727 case PPC::DIR_PWR_FUTURE: {
16728 if (!ML)
16729 break;
16730
16732 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16733 // so that we can decrease cache misses and branch-prediction misses.
16734 // Actual alignment of the loop will depend on the hotness check and other
16735 // logic in alignBlocks.
16736 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16737 return Align(32);
16738 }
16739
16740 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16741
16742 // For small loops (between 5 and 8 instructions), align to a 32-byte
16743 // boundary so that the entire loop fits in one instruction-cache line.
16744 uint64_t LoopSize = 0;
16745 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16746 for (const MachineInstr &J : **I) {
16747 LoopSize += TII->getInstSizeInBytes(J);
16748 if (LoopSize > 32)
16749 break;
16750 }
16751
16752 if (LoopSize > 16 && LoopSize <= 32)
16753 return Align(32);
16754
16755 break;
16756 }
16757 }
16758
16760}
16761
16762/// getConstraintType - Given a constraint, return the type of
16763/// constraint it is for this target.
16766 if (Constraint.size() == 1) {
16767 switch (Constraint[0]) {
16768 default: break;
16769 case 'b':
16770 case 'r':
16771 case 'f':
16772 case 'd':
16773 case 'v':
16774 case 'y':
16775 return C_RegisterClass;
16776 case 'Z':
16777 // FIXME: While Z does indicate a memory constraint, it specifically
16778 // indicates an r+r address (used in conjunction with the 'y' modifier
16779 // in the replacement string). Currently, we're forcing the base
16780 // register to be r0 in the asm printer (which is interpreted as zero)
16781 // and forming the complete address in the second register. This is
16782 // suboptimal.
16783 return C_Memory;
16784 }
16785 } else if (Constraint == "wc") { // individual CR bits.
16786 return C_RegisterClass;
16787 } else if (Constraint == "wa" || Constraint == "wd" ||
16788 Constraint == "wf" || Constraint == "ws" ||
16789 Constraint == "wi" || Constraint == "ww") {
16790 return C_RegisterClass; // VSX registers.
16791 }
16792 return TargetLowering::getConstraintType(Constraint);
16793}
16794
16795/// Examine constraint type and operand type and determine a weight value.
16796/// This object must already have been set up with the operand type
16797/// and the current alternative constraint selected.
16800 AsmOperandInfo &info, const char *constraint) const {
16802 Value *CallOperandVal = info.CallOperandVal;
16803 // If we don't have a value, we can't do a match,
16804 // but allow it at the lowest weight.
16805 if (!CallOperandVal)
16806 return CW_Default;
16807 Type *type = CallOperandVal->getType();
16808
16809 // Look at the constraint type.
16810 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16811 return CW_Register; // an individual CR bit.
16812 else if ((StringRef(constraint) == "wa" ||
16813 StringRef(constraint) == "wd" ||
16814 StringRef(constraint) == "wf") &&
16815 type->isVectorTy())
16816 return CW_Register;
16817 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16818 return CW_Register; // just hold 64-bit integers data.
16819 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16820 return CW_Register;
16821 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16822 return CW_Register;
16823
16824 switch (*constraint) {
16825 default:
16827 break;
16828 case 'b':
16829 if (type->isIntegerTy())
16830 weight = CW_Register;
16831 break;
16832 case 'f':
16833 if (type->isFloatTy())
16834 weight = CW_Register;
16835 break;
16836 case 'd':
16837 if (type->isDoubleTy())
16838 weight = CW_Register;
16839 break;
16840 case 'v':
16841 if (type->isVectorTy())
16842 weight = CW_Register;
16843 break;
16844 case 'y':
16845 weight = CW_Register;
16846 break;
16847 case 'Z':
16848 weight = CW_Memory;
16849 break;
16850 }
16851 return weight;
16852}
16853
16854std::pair<unsigned, const TargetRegisterClass *>
16856 StringRef Constraint,
16857 MVT VT) const {
16858 if (Constraint.size() == 1) {
16859 // GCC RS6000 Constraint Letters
16860 switch (Constraint[0]) {
16861 case 'b': // R1-R31
16862 if (VT == MVT::i64 && Subtarget.isPPC64())
16863 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16864 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16865 case 'r': // R0-R31
16866 if (VT == MVT::i64 && Subtarget.isPPC64())
16867 return std::make_pair(0U, &PPC::G8RCRegClass);
16868 return std::make_pair(0U, &PPC::GPRCRegClass);
16869 // 'd' and 'f' constraints are both defined to be "the floating point
16870 // registers", where one is for 32-bit and the other for 64-bit. We don't
16871 // really care overly much here so just give them all the same reg classes.
16872 case 'd':
16873 case 'f':
16874 if (Subtarget.hasSPE()) {
16875 if (VT == MVT::f32 || VT == MVT::i32)
16876 return std::make_pair(0U, &PPC::GPRCRegClass);
16877 if (VT == MVT::f64 || VT == MVT::i64)
16878 return std::make_pair(0U, &PPC::SPERCRegClass);
16879 } else {
16880 if (VT == MVT::f32 || VT == MVT::i32)
16881 return std::make_pair(0U, &PPC::F4RCRegClass);
16882 if (VT == MVT::f64 || VT == MVT::i64)
16883 return std::make_pair(0U, &PPC::F8RCRegClass);
16884 }
16885 break;
16886 case 'v':
16887 if (Subtarget.hasAltivec() && VT.isVector())
16888 return std::make_pair(0U, &PPC::VRRCRegClass);
16889 else if (Subtarget.hasVSX())
16890 // Scalars in Altivec registers only make sense with VSX.
16891 return std::make_pair(0U, &PPC::VFRCRegClass);
16892 break;
16893 case 'y': // crrc
16894 return std::make_pair(0U, &PPC::CRRCRegClass);
16895 }
16896 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16897 // An individual CR bit.
16898 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16899 } else if ((Constraint == "wa" || Constraint == "wd" ||
16900 Constraint == "wf" || Constraint == "wi") &&
16901 Subtarget.hasVSX()) {
16902 // A VSX register for either a scalar (FP) or vector. There is no
16903 // support for single precision scalars on subtargets prior to Power8.
16904 if (VT.isVector())
16905 return std::make_pair(0U, &PPC::VSRCRegClass);
16906 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16907 return std::make_pair(0U, &PPC::VSSRCRegClass);
16908 return std::make_pair(0U, &PPC::VSFRCRegClass);
16909 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16910 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16911 return std::make_pair(0U, &PPC::VSSRCRegClass);
16912 else
16913 return std::make_pair(0U, &PPC::VSFRCRegClass);
16914 } else if (Constraint == "lr") {
16915 if (VT == MVT::i64)
16916 return std::make_pair(0U, &PPC::LR8RCRegClass);
16917 else
16918 return std::make_pair(0U, &PPC::LRRCRegClass);
16919 }
16920
16921 // Handle special cases of physical registers that are not properly handled
16922 // by the base class.
16923 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16924 // If we name a VSX register, we can't defer to the base class because it
16925 // will not recognize the correct register (their names will be VSL{0-31}
16926 // and V{0-31} so they won't match). So we match them here.
16927 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16928 int VSNum = atoi(Constraint.data() + 3);
16929 assert(VSNum >= 0 && VSNum <= 63 &&
16930 "Attempted to access a vsr out of range");
16931 if (VSNum < 32)
16932 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16933 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16934 }
16935
16936 // For float registers, we can't defer to the base class as it will match
16937 // the SPILLTOVSRRC class.
16938 if (Constraint.size() > 3 && Constraint[1] == 'f') {
16939 int RegNum = atoi(Constraint.data() + 2);
16940 if (RegNum > 31 || RegNum < 0)
16941 report_fatal_error("Invalid floating point register number");
16942 if (VT == MVT::f32 || VT == MVT::i32)
16943 return Subtarget.hasSPE()
16944 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16945 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16946 if (VT == MVT::f64 || VT == MVT::i64)
16947 return Subtarget.hasSPE()
16948 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16949 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16950 }
16951 }
16952
16953 std::pair<unsigned, const TargetRegisterClass *> R =
16955
16956 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16957 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16958 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16959 // register.
16960 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16961 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16962 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16963 PPC::GPRCRegClass.contains(R.first))
16964 return std::make_pair(TRI->getMatchingSuperReg(R.first,
16965 PPC::sub_32, &PPC::G8RCRegClass),
16966 &PPC::G8RCRegClass);
16967
16968 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16969 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16970 R.first = PPC::CR0;
16971 R.second = &PPC::CRRCRegClass;
16972 }
16973 // FIXME: This warning should ideally be emitted in the front end.
16974 const auto &TM = getTargetMachine();
16975 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16976 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16977 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16978 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16979 errs() << "warning: vector registers 20 to 32 are reserved in the "
16980 "default AIX AltiVec ABI and cannot be used\n";
16981 }
16982
16983 return R;
16984}
16985
16986/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16987/// vector. If it is invalid, don't add anything to Ops.
16989 StringRef Constraint,
16990 std::vector<SDValue> &Ops,
16991 SelectionDAG &DAG) const {
16992 SDValue Result;
16993
16994 // Only support length 1 constraints.
16995 if (Constraint.size() > 1)
16996 return;
16997
16998 char Letter = Constraint[0];
16999 switch (Letter) {
17000 default: break;
17001 case 'I':
17002 case 'J':
17003 case 'K':
17004 case 'L':
17005 case 'M':
17006 case 'N':
17007 case 'O':
17008 case 'P': {
17009 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
17010 if (!CST) return; // Must be an immediate to match.
17011 SDLoc dl(Op);
17012 int64_t Value = CST->getSExtValue();
17013 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
17014 // numbers are printed as such.
17015 switch (Letter) {
17016 default: llvm_unreachable("Unknown constraint letter!");
17017 case 'I': // "I" is a signed 16-bit constant.
17018 if (isInt<16>(Value))
17019 Result = DAG.getTargetConstant(Value, dl, TCVT);
17020 break;
17021 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
17022 if (isShiftedUInt<16, 16>(Value))
17023 Result = DAG.getTargetConstant(Value, dl, TCVT);
17024 break;
17025 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
17026 if (isShiftedInt<16, 16>(Value))
17027 Result = DAG.getTargetConstant(Value, dl, TCVT);
17028 break;
17029 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
17030 if (isUInt<16>(Value))
17031 Result = DAG.getTargetConstant(Value, dl, TCVT);
17032 break;
17033 case 'M': // "M" is a constant that is greater than 31.
17034 if (Value > 31)
17035 Result = DAG.getTargetConstant(Value, dl, TCVT);
17036 break;
17037 case 'N': // "N" is a positive constant that is an exact power of two.
17038 if (Value > 0 && isPowerOf2_64(Value))
17039 Result = DAG.getTargetConstant(Value, dl, TCVT);
17040 break;
17041 case 'O': // "O" is the constant zero.
17042 if (Value == 0)
17043 Result = DAG.getTargetConstant(Value, dl, TCVT);
17044 break;
17045 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
17046 if (isInt<16>(-Value))
17047 Result = DAG.getTargetConstant(Value, dl, TCVT);
17048 break;
17049 }
17050 break;
17051 }
17052 }
17053
17054 if (Result.getNode()) {
17055 Ops.push_back(Result);
17056 return;
17057 }
17058
17059 // Handle standard constraint letters.
17060 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17061}
17062
17065 SelectionDAG &DAG) const {
17066 if (I.getNumOperands() <= 1)
17067 return;
17068 if (!isa<ConstantSDNode>(Ops[1].getNode()))
17069 return;
17070 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17071 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17072 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17073 return;
17074
17075 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17076 Ops.push_back(DAG.getMDNode(MDN));
17077}
17078
17079// isLegalAddressingMode - Return true if the addressing mode represented
17080// by AM is legal for this target, for a load/store of the specified type.
17082 const AddrMode &AM, Type *Ty,
17083 unsigned AS,
17084 Instruction *I) const {
17085 // Vector type r+i form is supported since power9 as DQ form. We don't check
17086 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17087 // imm form is preferred and the offset can be adjusted to use imm form later
17088 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17089 // max offset to check legal addressing mode, we should be a little aggressive
17090 // to contain other offsets for that LSRUse.
17091 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17092 return false;
17093
17094 // PPC allows a sign-extended 16-bit immediate field.
17095 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17096 return false;
17097
17098 // No global is ever allowed as a base.
17099 if (AM.BaseGV)
17100 return false;
17101
17102 // PPC only support r+r,
17103 switch (AM.Scale) {
17104 case 0: // "r+i" or just "i", depending on HasBaseReg.
17105 break;
17106 case 1:
17107 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
17108 return false;
17109 // Otherwise we have r+r or r+i.
17110 break;
17111 case 2:
17112 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
17113 return false;
17114 // Allow 2*r as r+r.
17115 break;
17116 default:
17117 // No other scales are supported.
17118 return false;
17119 }
17120
17121 return true;
17122}
17123
17124SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17125 SelectionDAG &DAG) const {
17127 MachineFrameInfo &MFI = MF.getFrameInfo();
17128 MFI.setReturnAddressIsTaken(true);
17129
17131 return SDValue();
17132
17133 SDLoc dl(Op);
17134 unsigned Depth = Op.getConstantOperandVal(0);
17135
17136 // Make sure the function does not optimize away the store of the RA to
17137 // the stack.
17138 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17139 FuncInfo->setLRStoreRequired();
17140 bool isPPC64 = Subtarget.isPPC64();
17141 auto PtrVT = getPointerTy(MF.getDataLayout());
17142
17143 if (Depth > 0) {
17144 // The link register (return address) is saved in the caller's frame
17145 // not the callee's stack frame. So we must get the caller's frame
17146 // address and load the return address at the LR offset from there.
17147 SDValue FrameAddr =
17148 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17149 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17150 SDValue Offset =
17151 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17152 isPPC64 ? MVT::i64 : MVT::i32);
17153 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17154 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17156 }
17157
17158 // Just load the return address off the stack.
17159 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17160 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17162}
17163
17164SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17165 SelectionDAG &DAG) const {
17166 SDLoc dl(Op);
17167 unsigned Depth = Op.getConstantOperandVal(0);
17168
17170 MachineFrameInfo &MFI = MF.getFrameInfo();
17171 MFI.setFrameAddressIsTaken(true);
17172
17173 EVT PtrVT = getPointerTy(MF.getDataLayout());
17174 bool isPPC64 = PtrVT == MVT::i64;
17175
17176 // Naked functions never have a frame pointer, and so we use r1. For all
17177 // other functions, this decision must be delayed until during PEI.
17178 unsigned FrameReg;
17179 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17180 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17181 else
17182 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17183
17184 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17185 PtrVT);
17186 while (Depth--)
17187 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17188 FrameAddr, MachinePointerInfo());
17189 return FrameAddr;
17190}
17191
17192// FIXME? Maybe this could be a TableGen attribute on some registers and
17193// this table could be generated automatically from RegInfo.
17195 const MachineFunction &MF) const {
17196 bool isPPC64 = Subtarget.isPPC64();
17197
17198 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
17199 if (!is64Bit && VT != LLT::scalar(32))
17200 report_fatal_error("Invalid register global variable type");
17201
17203 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
17204 .Case("r2", isPPC64 ? Register() : PPC::R2)
17205 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
17206 .Default(Register());
17207
17208 if (Reg)
17209 return Reg;
17210 report_fatal_error("Invalid register name global variable");
17211}
17212
17214 // 32-bit SVR4 ABI access everything as got-indirect.
17215 if (Subtarget.is32BitELFABI())
17216 return true;
17217
17218 // AIX accesses everything indirectly through the TOC, which is similar to
17219 // the GOT.
17220 if (Subtarget.isAIXABI())
17221 return true;
17222
17224 // If it is small or large code model, module locals are accessed
17225 // indirectly by loading their address from .toc/.got.
17226 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17227 return true;
17228
17229 // JumpTable and BlockAddress are accessed as got-indirect.
17230 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17231 return true;
17232
17233 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17234 return Subtarget.isGVIndirectSymbol(G->getGlobal());
17235
17236 return false;
17237}
17238
17239bool
17241 // The PowerPC target isn't yet aware of offsets.
17242 return false;
17243}
17244
17246 const CallInst &I,
17247 MachineFunction &MF,
17248 unsigned Intrinsic) const {
17249 switch (Intrinsic) {
17250 case Intrinsic::ppc_atomicrmw_xchg_i128:
17251 case Intrinsic::ppc_atomicrmw_add_i128:
17252 case Intrinsic::ppc_atomicrmw_sub_i128:
17253 case Intrinsic::ppc_atomicrmw_nand_i128:
17254 case Intrinsic::ppc_atomicrmw_and_i128:
17255 case Intrinsic::ppc_atomicrmw_or_i128:
17256 case Intrinsic::ppc_atomicrmw_xor_i128:
17257 case Intrinsic::ppc_cmpxchg_i128:
17259 Info.memVT = MVT::i128;
17260 Info.ptrVal = I.getArgOperand(0);
17261 Info.offset = 0;
17262 Info.align = Align(16);
17265 return true;
17266 case Intrinsic::ppc_atomic_load_i128:
17268 Info.memVT = MVT::i128;
17269 Info.ptrVal = I.getArgOperand(0);
17270 Info.offset = 0;
17271 Info.align = Align(16);
17273 return true;
17274 case Intrinsic::ppc_atomic_store_i128:
17276 Info.memVT = MVT::i128;
17277 Info.ptrVal = I.getArgOperand(2);
17278 Info.offset = 0;
17279 Info.align = Align(16);
17281 return true;
17282 case Intrinsic::ppc_altivec_lvx:
17283 case Intrinsic::ppc_altivec_lvxl:
17284 case Intrinsic::ppc_altivec_lvebx:
17285 case Intrinsic::ppc_altivec_lvehx:
17286 case Intrinsic::ppc_altivec_lvewx:
17287 case Intrinsic::ppc_vsx_lxvd2x:
17288 case Intrinsic::ppc_vsx_lxvw4x:
17289 case Intrinsic::ppc_vsx_lxvd2x_be:
17290 case Intrinsic::ppc_vsx_lxvw4x_be:
17291 case Intrinsic::ppc_vsx_lxvl:
17292 case Intrinsic::ppc_vsx_lxvll: {
17293 EVT VT;
17294 switch (Intrinsic) {
17295 case Intrinsic::ppc_altivec_lvebx:
17296 VT = MVT::i8;
17297 break;
17298 case Intrinsic::ppc_altivec_lvehx:
17299 VT = MVT::i16;
17300 break;
17301 case Intrinsic::ppc_altivec_lvewx:
17302 VT = MVT::i32;
17303 break;
17304 case Intrinsic::ppc_vsx_lxvd2x:
17305 case Intrinsic::ppc_vsx_lxvd2x_be:
17306 VT = MVT::v2f64;
17307 break;
17308 default:
17309 VT = MVT::v4i32;
17310 break;
17311 }
17312
17314 Info.memVT = VT;
17315 Info.ptrVal = I.getArgOperand(0);
17316 Info.offset = -VT.getStoreSize()+1;
17317 Info.size = 2*VT.getStoreSize()-1;
17318 Info.align = Align(1);
17320 return true;
17321 }
17322 case Intrinsic::ppc_altivec_stvx:
17323 case Intrinsic::ppc_altivec_stvxl:
17324 case Intrinsic::ppc_altivec_stvebx:
17325 case Intrinsic::ppc_altivec_stvehx:
17326 case Intrinsic::ppc_altivec_stvewx:
17327 case Intrinsic::ppc_vsx_stxvd2x:
17328 case Intrinsic::ppc_vsx_stxvw4x:
17329 case Intrinsic::ppc_vsx_stxvd2x_be:
17330 case Intrinsic::ppc_vsx_stxvw4x_be:
17331 case Intrinsic::ppc_vsx_stxvl:
17332 case Intrinsic::ppc_vsx_stxvll: {
17333 EVT VT;
17334 switch (Intrinsic) {
17335 case Intrinsic::ppc_altivec_stvebx:
17336 VT = MVT::i8;
17337 break;
17338 case Intrinsic::ppc_altivec_stvehx:
17339 VT = MVT::i16;
17340 break;
17341 case Intrinsic::ppc_altivec_stvewx:
17342 VT = MVT::i32;
17343 break;
17344 case Intrinsic::ppc_vsx_stxvd2x:
17345 case Intrinsic::ppc_vsx_stxvd2x_be:
17346 VT = MVT::v2f64;
17347 break;
17348 default:
17349 VT = MVT::v4i32;
17350 break;
17351 }
17352
17354 Info.memVT = VT;
17355 Info.ptrVal = I.getArgOperand(1);
17356 Info.offset = -VT.getStoreSize()+1;
17357 Info.size = 2*VT.getStoreSize()-1;
17358 Info.align = Align(1);
17360 return true;
17361 }
17362 case Intrinsic::ppc_stdcx:
17363 case Intrinsic::ppc_stwcx:
17364 case Intrinsic::ppc_sthcx:
17365 case Intrinsic::ppc_stbcx: {
17366 EVT VT;
17367 auto Alignment = Align(8);
17368 switch (Intrinsic) {
17369 case Intrinsic::ppc_stdcx:
17370 VT = MVT::i64;
17371 break;
17372 case Intrinsic::ppc_stwcx:
17373 VT = MVT::i32;
17374 Alignment = Align(4);
17375 break;
17376 case Intrinsic::ppc_sthcx:
17377 VT = MVT::i16;
17378 Alignment = Align(2);
17379 break;
17380 case Intrinsic::ppc_stbcx:
17381 VT = MVT::i8;
17382 Alignment = Align(1);
17383 break;
17384 }
17386 Info.memVT = VT;
17387 Info.ptrVal = I.getArgOperand(0);
17388 Info.offset = 0;
17389 Info.align = Alignment;
17391 return true;
17392 }
17393 default:
17394 break;
17395 }
17396
17397 return false;
17398}
17399
17400/// It returns EVT::Other if the type should be determined using generic
17401/// target-independent logic.
17403 const MemOp &Op, const AttributeList &FuncAttributes) const {
17404 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17405 // We should use Altivec/VSX loads and stores when available. For unaligned
17406 // addresses, unaligned VSX loads are only fast starting with the P8.
17407 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17408 if (Op.isMemset() && Subtarget.hasVSX()) {
17409 uint64_t TailSize = Op.size() % 16;
17410 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17411 // element if vector element type matches tail store. For tail size
17412 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17413 if (TailSize > 2 && TailSize <= 4) {
17414 return MVT::v8i16;
17415 }
17416 return MVT::v4i32;
17417 }
17418 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17419 return MVT::v4i32;
17420 }
17421 }
17422
17423 if (Subtarget.isPPC64()) {
17424 return MVT::i64;
17425 }
17426
17427 return MVT::i32;
17428}
17429
17430/// Returns true if it is beneficial to convert a load of a constant
17431/// to just the constant itself.
17433 Type *Ty) const {
17434 assert(Ty->isIntegerTy());
17435
17436 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17437 return !(BitSize == 0 || BitSize > 64);
17438}
17439
17441 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17442 return false;
17443 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17444 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17445 return NumBits1 == 64 && NumBits2 == 32;
17446}
17447
17449 if (!VT1.isInteger() || !VT2.isInteger())
17450 return false;
17451 unsigned NumBits1 = VT1.getSizeInBits();
17452 unsigned NumBits2 = VT2.getSizeInBits();
17453 return NumBits1 == 64 && NumBits2 == 32;
17454}
17455
17457 // Generally speaking, zexts are not free, but they are free when they can be
17458 // folded with other operations.
17459 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17460 EVT MemVT = LD->getMemoryVT();
17461 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17462 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17463 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17464 LD->getExtensionType() == ISD::ZEXTLOAD))
17465 return true;
17466 }
17467
17468 // FIXME: Add other cases...
17469 // - 32-bit shifts with a zext to i64
17470 // - zext after ctlz, bswap, etc.
17471 // - zext after and by a constant mask
17472
17473 return TargetLowering::isZExtFree(Val, VT2);
17474}
17475
17476bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17477 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17478 "invalid fpext types");
17479 // Extending to float128 is not free.
17480 if (DestVT == MVT::f128)
17481 return false;
17482 return true;
17483}
17484
17486 return isInt<16>(Imm) || isUInt<16>(Imm);
17487}
17488
17490 return isInt<16>(Imm) || isUInt<16>(Imm);
17491}
17492
17495 unsigned *Fast) const {
17497 return false;
17498
17499 // PowerPC supports unaligned memory access for simple non-vector types.
17500 // Although accessing unaligned addresses is not as efficient as accessing
17501 // aligned addresses, it is generally more efficient than manual expansion,
17502 // and generally only traps for software emulation when crossing page
17503 // boundaries.
17504
17505 if (!VT.isSimple())
17506 return false;
17507
17508 if (VT.isFloatingPoint() && !VT.isVector() &&
17509 !Subtarget.allowsUnalignedFPAccess())
17510 return false;
17511
17512 if (VT.getSimpleVT().isVector()) {
17513 if (Subtarget.hasVSX()) {
17514 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17515 VT != MVT::v4f32 && VT != MVT::v4i32)
17516 return false;
17517 } else {
17518 return false;
17519 }
17520 }
17521
17522 if (VT == MVT::ppcf128)
17523 return false;
17524
17525 if (Fast)
17526 *Fast = 1;
17527
17528 return true;
17529}
17530
17532 SDValue C) const {
17533 // Check integral scalar types.
17534 if (!VT.isScalarInteger())
17535 return false;
17536 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17537 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17538 return false;
17539 // This transformation will generate >= 2 operations. But the following
17540 // cases will generate <= 2 instructions during ISEL. So exclude them.
17541 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17542 // HW instruction, ie. MULLI
17543 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17544 // instruction is needed than case 1, ie. MULLI and RLDICR
17545 int64_t Imm = ConstNode->getSExtValue();
17546 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17547 Imm >>= Shift;
17548 if (isInt<16>(Imm))
17549 return false;
17550 uint64_t UImm = static_cast<uint64_t>(Imm);
17551 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17552 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17553 return true;
17554 }
17555 return false;
17556}
17557
17559 EVT VT) const {
17562}
17563
17565 Type *Ty) const {
17566 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17567 return false;
17568 switch (Ty->getScalarType()->getTypeID()) {
17569 case Type::FloatTyID:
17570 case Type::DoubleTyID:
17571 return true;
17572 case Type::FP128TyID:
17573 return Subtarget.hasP9Vector();
17574 default:
17575 return false;
17576 }
17577}
17578
17579// FIXME: add more patterns which are not profitable to hoist.
17581 if (!I->hasOneUse())
17582 return true;
17583
17584 Instruction *User = I->user_back();
17585 assert(User && "A single use instruction with no uses.");
17586
17587 switch (I->getOpcode()) {
17588 case Instruction::FMul: {
17589 // Don't break FMA, PowerPC prefers FMA.
17590 if (User->getOpcode() != Instruction::FSub &&
17591 User->getOpcode() != Instruction::FAdd)
17592 return true;
17593
17595 const Function *F = I->getFunction();
17596 const DataLayout &DL = F->getDataLayout();
17597 Type *Ty = User->getOperand(0)->getType();
17598
17599 return !(
17602 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17603 }
17604 case Instruction::Load: {
17605 // Don't break "store (load float*)" pattern, this pattern will be combined
17606 // to "store (load int32)" in later InstCombine pass. See function
17607 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17608 // cycles than loading a 32 bit integer.
17609 LoadInst *LI = cast<LoadInst>(I);
17610 // For the loads that combineLoadToOperationType does nothing, like
17611 // ordered load, it should be profitable to hoist them.
17612 // For swifterror load, it can only be used for pointer to pointer type, so
17613 // later type check should get rid of this case.
17614 if (!LI->isUnordered())
17615 return true;
17616
17617 if (User->getOpcode() != Instruction::Store)
17618 return true;
17619
17620 if (I->getType()->getTypeID() != Type::FloatTyID)
17621 return true;
17622
17623 return false;
17624 }
17625 default:
17626 return true;
17627 }
17628 return true;
17629}
17630
17631const MCPhysReg *
17633 // LR is a callee-save register, but we must treat it as clobbered by any call
17634 // site. Hence we include LR in the scratch registers, which are in turn added
17635 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17636 // to CTR, which is used by any indirect call.
17637 static const MCPhysReg ScratchRegs[] = {
17638 PPC::X12, PPC::LR8, PPC::CTR8, 0
17639 };
17640
17641 return ScratchRegs;
17642}
17643
17645 const Constant *PersonalityFn) const {
17646 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17647}
17648
17650 const Constant *PersonalityFn) const {
17651 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17652}
17653
17654bool
17656 EVT VT , unsigned DefinedValues) const {
17657 if (VT == MVT::v2i64)
17658 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17659
17660 if (Subtarget.hasVSX())
17661 return true;
17662
17664}
17665
17667 if (DisableILPPref || Subtarget.enableMachineScheduler())
17669
17670 return Sched::ILP;
17671}
17672
17673// Create a fast isel object.
17674FastISel *
17676 const TargetLibraryInfo *LibInfo) const {
17677 return PPC::createFastISel(FuncInfo, LibInfo);
17678}
17679
17680// 'Inverted' means the FMA opcode after negating one multiplicand.
17681// For example, (fma -a b c) = (fnmsub a b c)
17682static unsigned invertFMAOpcode(unsigned Opc) {
17683 switch (Opc) {
17684 default:
17685 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17686 case ISD::FMA:
17687 return PPCISD::FNMSUB;
17688 case PPCISD::FNMSUB:
17689 return ISD::FMA;
17690 }
17691}
17692
17694 bool LegalOps, bool OptForSize,
17696 unsigned Depth) const {
17698 return SDValue();
17699
17700 unsigned Opc = Op.getOpcode();
17701 EVT VT = Op.getValueType();
17702 SDNodeFlags Flags = Op.getNode()->getFlags();
17703
17704 switch (Opc) {
17705 case PPCISD::FNMSUB:
17706 if (!Op.hasOneUse() || !isTypeLegal(VT))
17707 break;
17708
17710 SDValue N0 = Op.getOperand(0);
17711 SDValue N1 = Op.getOperand(1);
17712 SDValue N2 = Op.getOperand(2);
17713 SDLoc Loc(Op);
17714
17716 SDValue NegN2 =
17717 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17718
17719 if (!NegN2)
17720 return SDValue();
17721
17722 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17723 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17724 // These transformations may change sign of zeroes. For example,
17725 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17726 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17727 // Try and choose the cheaper one to negate.
17729 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17730 N0Cost, Depth + 1);
17731
17733 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17734 N1Cost, Depth + 1);
17735
17736 if (NegN0 && N0Cost <= N1Cost) {
17737 Cost = std::min(N0Cost, N2Cost);
17738 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17739 } else if (NegN1) {
17740 Cost = std::min(N1Cost, N2Cost);
17741 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17742 }
17743 }
17744
17745 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17746 if (isOperationLegal(ISD::FMA, VT)) {
17747 Cost = N2Cost;
17748 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17749 }
17750
17751 break;
17752 }
17753
17754 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17755 Cost, Depth);
17756}
17757
17758// Override to enable LOAD_STACK_GUARD lowering on Linux.
17760 if (!Subtarget.isTargetLinux())
17762 return true;
17763}
17764
17765// Override to disable global variable loading on Linux and insert AIX canary
17766// word declaration.
17768 if (Subtarget.isAIXABI()) {
17769 M.getOrInsertGlobal(AIXSSPCanaryWordName,
17770 PointerType::getUnqual(M.getContext()));
17771 return;
17772 }
17773 if (!Subtarget.isTargetLinux())
17775}
17776
17778 if (Subtarget.isAIXABI())
17779 return M.getGlobalVariable(AIXSSPCanaryWordName);
17781}
17782
17784 bool ForCodeSize) const {
17785 if (!VT.isSimple() || !Subtarget.hasVSX())
17786 return false;
17787
17788 switch(VT.getSimpleVT().SimpleTy) {
17789 default:
17790 // For FP types that are currently not supported by PPC backend, return
17791 // false. Examples: f16, f80.
17792 return false;
17793 case MVT::f32:
17794 case MVT::f64: {
17795 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
17796 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17797 return true;
17798 }
17799 bool IsExact;
17800 APSInt IntResult(16, false);
17801 // The rounding mode doesn't really matter because we only care about floats
17802 // that can be converted to integers exactly.
17803 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17804 // For exact values in the range [-16, 15] we can materialize the float.
17805 if (IsExact && IntResult <= 15 && IntResult >= -16)
17806 return true;
17807 return Imm.isZero();
17808 }
17809 case MVT::ppcf128:
17810 return Imm.isPosZero();
17811 }
17812}
17813
17814// For vector shift operation op, fold
17815// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17817 SelectionDAG &DAG) {
17818 SDValue N0 = N->getOperand(0);
17819 SDValue N1 = N->getOperand(1);
17820 EVT VT = N0.getValueType();
17821 unsigned OpSizeInBits = VT.getScalarSizeInBits();
17822 unsigned Opcode = N->getOpcode();
17823 unsigned TargetOpcode;
17824
17825 switch (Opcode) {
17826 default:
17827 llvm_unreachable("Unexpected shift operation");
17828 case ISD::SHL:
17829 TargetOpcode = PPCISD::SHL;
17830 break;
17831 case ISD::SRL:
17832 TargetOpcode = PPCISD::SRL;
17833 break;
17834 case ISD::SRA:
17835 TargetOpcode = PPCISD::SRA;
17836 break;
17837 }
17838
17839 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17840 N1->getOpcode() == ISD::AND)
17841 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17842 if (Mask->getZExtValue() == OpSizeInBits - 1)
17843 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17844
17845 return SDValue();
17846}
17847
17848SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17849 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17850 return Value;
17851
17852 SDValue N0 = N->getOperand(0);
17853 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17854 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17855 N0.getOpcode() != ISD::SIGN_EXTEND ||
17856 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17857 N->getValueType(0) != MVT::i64)
17858 return SDValue();
17859
17860 // We can't save an operation here if the value is already extended, and
17861 // the existing shift is easier to combine.
17862 SDValue ExtsSrc = N0.getOperand(0);
17863 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17864 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17865 return SDValue();
17866
17867 SDLoc DL(N0);
17868 SDValue ShiftBy = SDValue(CN1, 0);
17869 // We want the shift amount to be i32 on the extswli, but the shift could
17870 // have an i64.
17871 if (ShiftBy.getValueType() == MVT::i64)
17872 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17873
17874 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17875 ShiftBy);
17876}
17877
17878SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17879 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17880 return Value;
17881
17882 return SDValue();
17883}
17884
17885SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17886 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17887 return Value;
17888
17889 return SDValue();
17890}
17891
17892// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17893// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17894// When C is zero, the equation (addi Z, -C) can be simplified to Z
17895// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17897 const PPCSubtarget &Subtarget) {
17898 if (!Subtarget.isPPC64())
17899 return SDValue();
17900
17901 SDValue LHS = N->getOperand(0);
17902 SDValue RHS = N->getOperand(1);
17903
17904 auto isZextOfCompareWithConstant = [](SDValue Op) {
17905 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17906 Op.getValueType() != MVT::i64)
17907 return false;
17908
17909 SDValue Cmp = Op.getOperand(0);
17910 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17911 Cmp.getOperand(0).getValueType() != MVT::i64)
17912 return false;
17913
17914 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17915 int64_t NegConstant = 0 - Constant->getSExtValue();
17916 // Due to the limitations of the addi instruction,
17917 // -C is required to be [-32768, 32767].
17918 return isInt<16>(NegConstant);
17919 }
17920
17921 return false;
17922 };
17923
17924 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17925 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17926
17927 // If there is a pattern, canonicalize a zext operand to the RHS.
17928 if (LHSHasPattern && !RHSHasPattern)
17929 std::swap(LHS, RHS);
17930 else if (!LHSHasPattern && !RHSHasPattern)
17931 return SDValue();
17932
17933 SDLoc DL(N);
17934 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17935 SDValue Cmp = RHS.getOperand(0);
17936 SDValue Z = Cmp.getOperand(0);
17937 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17938 int64_t NegConstant = 0 - Constant->getSExtValue();
17939
17940 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17941 default: break;
17942 case ISD::SETNE: {
17943 // when C == 0
17944 // --> addze X, (addic Z, -1).carry
17945 // /
17946 // add X, (zext(setne Z, C))--
17947 // \ when -32768 <= -C <= 32767 && C != 0
17948 // --> addze X, (addic (addi Z, -C), -1).carry
17949 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17950 DAG.getConstant(NegConstant, DL, MVT::i64));
17951 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17952 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17953 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17954 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17955 SDValue(Addc.getNode(), 1));
17956 }
17957 case ISD::SETEQ: {
17958 // when C == 0
17959 // --> addze X, (subfic Z, 0).carry
17960 // /
17961 // add X, (zext(sete Z, C))--
17962 // \ when -32768 <= -C <= 32767 && C != 0
17963 // --> addze X, (subfic (addi Z, -C), 0).carry
17964 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17965 DAG.getConstant(NegConstant, DL, MVT::i64));
17966 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17967 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17968 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17969 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17970 SDValue(Subc.getNode(), 1));
17971 }
17972 }
17973
17974 return SDValue();
17975}
17976
17977// Transform
17978// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17979// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17980// In this case both C1 and C2 must be known constants.
17981// C1+C2 must fit into a 34 bit signed integer.
17983 const PPCSubtarget &Subtarget) {
17984 if (!Subtarget.isUsingPCRelativeCalls())
17985 return SDValue();
17986
17987 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17988 // If we find that node try to cast the Global Address and the Constant.
17989 SDValue LHS = N->getOperand(0);
17990 SDValue RHS = N->getOperand(1);
17991
17992 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17993 std::swap(LHS, RHS);
17994
17995 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17996 return SDValue();
17997
17998 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17999 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
18000 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
18001
18002 // Check that both casts succeeded.
18003 if (!GSDN || !ConstNode)
18004 return SDValue();
18005
18006 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
18007 SDLoc DL(GSDN);
18008
18009 // The signed int offset needs to fit in 34 bits.
18010 if (!isInt<34>(NewOffset))
18011 return SDValue();
18012
18013 // The new global address is a copy of the old global address except
18014 // that it has the updated Offset.
18015 SDValue GA =
18016 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
18017 NewOffset, GSDN->getTargetFlags());
18018 SDValue MatPCRel =
18019 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
18020 return MatPCRel;
18021}
18022
18023SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
18024 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
18025 return Value;
18026
18027 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
18028 return Value;
18029
18030 return SDValue();
18031}
18032
18033// Detect TRUNCATE operations on bitcasts of float128 values.
18034// What we are looking for here is the situtation where we extract a subset
18035// of bits from a 128 bit float.
18036// This can be of two forms:
18037// 1) BITCAST of f128 feeding TRUNCATE
18038// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
18039// The reason this is required is because we do not have a legal i128 type
18040// and so we want to prevent having to store the f128 and then reload part
18041// of it.
18042SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
18043 DAGCombinerInfo &DCI) const {
18044 // If we are using CRBits then try that first.
18045 if (Subtarget.useCRBits()) {
18046 // Check if CRBits did anything and return that if it did.
18047 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
18048 return CRTruncValue;
18049 }
18050
18051 SDLoc dl(N);
18052 SDValue Op0 = N->getOperand(0);
18053
18054 // Looking for a truncate of i128 to i64.
18055 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
18056 return SDValue();
18057
18058 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
18059
18060 // SRL feeding TRUNCATE.
18061 if (Op0.getOpcode() == ISD::SRL) {
18062 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
18063 // The right shift has to be by 64 bits.
18064 if (!ConstNode || ConstNode->getZExtValue() != 64)
18065 return SDValue();
18066
18067 // Switch the element number to extract.
18068 EltToExtract = EltToExtract ? 0 : 1;
18069 // Update Op0 past the SRL.
18070 Op0 = Op0.getOperand(0);
18071 }
18072
18073 // BITCAST feeding a TRUNCATE possibly via SRL.
18074 if (Op0.getOpcode() == ISD::BITCAST &&
18075 Op0.getValueType() == MVT::i128 &&
18076 Op0.getOperand(0).getValueType() == MVT::f128) {
18077 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18078 return DCI.DAG.getNode(
18079 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18080 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18081 }
18082 return SDValue();
18083}
18084
18085SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18086 SelectionDAG &DAG = DCI.DAG;
18087
18088 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18089 if (!ConstOpOrElement)
18090 return SDValue();
18091
18092 // An imul is usually smaller than the alternative sequence for legal type.
18094 isOperationLegal(ISD::MUL, N->getValueType(0)))
18095 return SDValue();
18096
18097 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18098 switch (this->Subtarget.getCPUDirective()) {
18099 default:
18100 // TODO: enhance the condition for subtarget before pwr8
18101 return false;
18102 case PPC::DIR_PWR8:
18103 // type mul add shl
18104 // scalar 4 1 1
18105 // vector 7 2 2
18106 return true;
18107 case PPC::DIR_PWR9:
18108 case PPC::DIR_PWR10:
18109 case PPC::DIR_PWR11:
18111 // type mul add shl
18112 // scalar 5 2 2
18113 // vector 7 2 2
18114
18115 // The cycle RATIO of related operations are showed as a table above.
18116 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18117 // scalar and vector type. For 2 instrs patterns, add/sub + shl
18118 // are 4, it is always profitable; but for 3 instrs patterns
18119 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18120 // So we should only do it for vector type.
18121 return IsAddOne && IsNeg ? VT.isVector() : true;
18122 }
18123 };
18124
18125 EVT VT = N->getValueType(0);
18126 SDLoc DL(N);
18127
18128 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18129 bool IsNeg = MulAmt.isNegative();
18130 APInt MulAmtAbs = MulAmt.abs();
18131
18132 if ((MulAmtAbs - 1).isPowerOf2()) {
18133 // (mul x, 2^N + 1) => (add (shl x, N), x)
18134 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18135
18136 if (!IsProfitable(IsNeg, true, VT))
18137 return SDValue();
18138
18139 SDValue Op0 = N->getOperand(0);
18140 SDValue Op1 =
18141 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18142 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18143 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18144
18145 if (!IsNeg)
18146 return Res;
18147
18148 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18149 } else if ((MulAmtAbs + 1).isPowerOf2()) {
18150 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18151 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18152
18153 if (!IsProfitable(IsNeg, false, VT))
18154 return SDValue();
18155
18156 SDValue Op0 = N->getOperand(0);
18157 SDValue Op1 =
18158 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18159 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18160
18161 if (!IsNeg)
18162 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18163 else
18164 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18165
18166 } else {
18167 return SDValue();
18168 }
18169}
18170
18171// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18172// in combiner since we need to check SD flags and other subtarget features.
18173SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18174 DAGCombinerInfo &DCI) const {
18175 SDValue N0 = N->getOperand(0);
18176 SDValue N1 = N->getOperand(1);
18177 SDValue N2 = N->getOperand(2);
18178 SDNodeFlags Flags = N->getFlags();
18179 EVT VT = N->getValueType(0);
18180 SelectionDAG &DAG = DCI.DAG;
18182 unsigned Opc = N->getOpcode();
18183 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18184 bool LegalOps = !DCI.isBeforeLegalizeOps();
18185 SDLoc Loc(N);
18186
18187 if (!isOperationLegal(ISD::FMA, VT))
18188 return SDValue();
18189
18190 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18191 // since (fnmsub a b c)=-0 while c-ab=+0.
18192 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18193 return SDValue();
18194
18195 // (fma (fneg a) b c) => (fnmsub a b c)
18196 // (fnmsub (fneg a) b c) => (fma a b c)
18197 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18198 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18199
18200 // (fma a (fneg b) c) => (fnmsub a b c)
18201 // (fnmsub a (fneg b) c) => (fma a b c)
18202 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18203 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18204
18205 return SDValue();
18206}
18207
18208bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18209 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18210 if (!Subtarget.is64BitELFABI())
18211 return false;
18212
18213 // If not a tail call then no need to proceed.
18214 if (!CI->isTailCall())
18215 return false;
18216
18217 // If sibling calls have been disabled and tail-calls aren't guaranteed
18218 // there is no reason to duplicate.
18219 auto &TM = getTargetMachine();
18220 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18221 return false;
18222
18223 // Can't tail call a function called indirectly, or if it has variadic args.
18224 const Function *Callee = CI->getCalledFunction();
18225 if (!Callee || Callee->isVarArg())
18226 return false;
18227
18228 // Make sure the callee and caller calling conventions are eligible for tco.
18229 const Function *Caller = CI->getParent()->getParent();
18230 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18231 CI->getCallingConv()))
18232 return false;
18233
18234 // If the function is local then we have a good chance at tail-calling it
18235 return getTargetMachine().shouldAssumeDSOLocal(Callee);
18236}
18237
18238bool PPCTargetLowering::
18239isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18240 const Value *Mask = AndI.getOperand(1);
18241 // If the mask is suitable for andi. or andis. we should sink the and.
18242 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18243 // Can't handle constants wider than 64-bits.
18244 if (CI->getBitWidth() > 64)
18245 return false;
18246 int64_t ConstVal = CI->getZExtValue();
18247 return isUInt<16>(ConstVal) ||
18248 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18249 }
18250
18251 // For non-constant masks, we can always use the record-form and.
18252 return true;
18253}
18254
18255/// getAddrModeForFlags - Based on the set of address flags, select the most
18256/// optimal instruction format to match by.
18257PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18258 // This is not a node we should be handling here.
18259 if (Flags == PPC::MOF_None)
18260 return PPC::AM_None;
18261 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18262 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18263 if ((Flags & FlagSet) == FlagSet)
18264 return PPC::AM_DForm;
18265 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18266 if ((Flags & FlagSet) == FlagSet)
18267 return PPC::AM_DSForm;
18268 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18269 if ((Flags & FlagSet) == FlagSet)
18270 return PPC::AM_DQForm;
18271 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18272 if ((Flags & FlagSet) == FlagSet)
18273 return PPC::AM_PrefixDForm;
18274 // If no other forms are selected, return an X-Form as it is the most
18275 // general addressing mode.
18276 return PPC::AM_XForm;
18277}
18278
18279/// Set alignment flags based on whether or not the Frame Index is aligned.
18280/// Utilized when computing flags for address computation when selecting
18281/// load and store instructions.
18282static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18283 SelectionDAG &DAG) {
18284 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18285 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18286 if (!FI)
18287 return;
18289 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18290 // If this is (add $FI, $S16Imm), the alignment flags are already set
18291 // based on the immediate. We just need to clear the alignment flags
18292 // if the FI alignment is weaker.
18293 if ((FrameIndexAlign % 4) != 0)
18294 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18295 if ((FrameIndexAlign % 16) != 0)
18296 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18297 // If the address is a plain FrameIndex, set alignment flags based on
18298 // FI alignment.
18299 if (!IsAdd) {
18300 if ((FrameIndexAlign % 4) == 0)
18301 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18302 if ((FrameIndexAlign % 16) == 0)
18303 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18304 }
18305}
18306
18307/// Given a node, compute flags that are used for address computation when
18308/// selecting load and store instructions. The flags computed are stored in
18309/// FlagSet. This function takes into account whether the node is a constant,
18310/// an ADD, OR, or a constant, and computes the address flags accordingly.
18311static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18312 SelectionDAG &DAG) {
18313 // Set the alignment flags for the node depending on if the node is
18314 // 4-byte or 16-byte aligned.
18315 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18316 if ((Imm & 0x3) == 0)
18317 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18318 if ((Imm & 0xf) == 0)
18319 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18320 };
18321
18322 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18323 // All 32-bit constants can be computed as LIS + Disp.
18324 const APInt &ConstImm = CN->getAPIntValue();
18325 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18326 FlagSet |= PPC::MOF_AddrIsSImm32;
18327 SetAlignFlagsForImm(ConstImm.getZExtValue());
18328 setAlignFlagsForFI(N, FlagSet, DAG);
18329 }
18330 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18331 FlagSet |= PPC::MOF_RPlusSImm34;
18332 else // Let constant materialization handle large constants.
18333 FlagSet |= PPC::MOF_NotAddNorCst;
18334 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18335 // This address can be represented as an addition of:
18336 // - Register + Imm16 (possibly a multiple of 4/16)
18337 // - Register + Imm34
18338 // - Register + PPCISD::Lo
18339 // - Register + Register
18340 // In any case, we won't have to match this as Base + Zero.
18341 SDValue RHS = N.getOperand(1);
18342 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18343 const APInt &ConstImm = CN->getAPIntValue();
18344 if (ConstImm.isSignedIntN(16)) {
18345 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18346 SetAlignFlagsForImm(ConstImm.getZExtValue());
18347 setAlignFlagsForFI(N, FlagSet, DAG);
18348 }
18349 if (ConstImm.isSignedIntN(34))
18350 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18351 else
18352 FlagSet |= PPC::MOF_RPlusR; // Register.
18353 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18354 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18355 else
18356 FlagSet |= PPC::MOF_RPlusR;
18357 } else { // The address computation is not a constant or an addition.
18358 setAlignFlagsForFI(N, FlagSet, DAG);
18359 FlagSet |= PPC::MOF_NotAddNorCst;
18360 }
18361}
18362
18363static bool isPCRelNode(SDValue N) {
18364 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18365 isValidPCRelNode<ConstantPoolSDNode>(N) ||
18366 isValidPCRelNode<GlobalAddressSDNode>(N) ||
18367 isValidPCRelNode<JumpTableSDNode>(N) ||
18368 isValidPCRelNode<BlockAddressSDNode>(N));
18369}
18370
18371/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18372/// the address flags of the load/store instruction that is to be matched.
18373unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18374 SelectionDAG &DAG) const {
18375 unsigned FlagSet = PPC::MOF_None;
18376
18377 // Compute subtarget flags.
18378 if (!Subtarget.hasP9Vector())
18379 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18380 else
18381 FlagSet |= PPC::MOF_SubtargetP9;
18382
18383 if (Subtarget.hasPrefixInstrs())
18384 FlagSet |= PPC::MOF_SubtargetP10;
18385
18386 if (Subtarget.hasSPE())
18387 FlagSet |= PPC::MOF_SubtargetSPE;
18388
18389 // Check if we have a PCRel node and return early.
18390 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18391 return FlagSet;
18392
18393 // If the node is the paired load/store intrinsics, compute flags for
18394 // address computation and return early.
18395 unsigned ParentOp = Parent->getOpcode();
18396 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18397 (ParentOp == ISD::INTRINSIC_VOID))) {
18398 unsigned ID = Parent->getConstantOperandVal(1);
18399 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18400 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18401 ? Parent->getOperand(2)
18402 : Parent->getOperand(3);
18403 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18404 FlagSet |= PPC::MOF_Vector;
18405 return FlagSet;
18406 }
18407 }
18408
18409 // Mark this as something we don't want to handle here if it is atomic
18410 // or pre-increment instruction.
18411 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18412 if (LSB->isIndexed())
18413 return PPC::MOF_None;
18414
18415 // Compute in-memory type flags. This is based on if there are scalars,
18416 // floats or vectors.
18417 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18418 assert(MN && "Parent should be a MemSDNode!");
18419 EVT MemVT = MN->getMemoryVT();
18420 unsigned Size = MemVT.getSizeInBits();
18421 if (MemVT.isScalarInteger()) {
18422 assert(Size <= 128 &&
18423 "Not expecting scalar integers larger than 16 bytes!");
18424 if (Size < 32)
18425 FlagSet |= PPC::MOF_SubWordInt;
18426 else if (Size == 32)
18427 FlagSet |= PPC::MOF_WordInt;
18428 else
18429 FlagSet |= PPC::MOF_DoubleWordInt;
18430 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18431 if (Size == 128)
18432 FlagSet |= PPC::MOF_Vector;
18433 else if (Size == 256) {
18434 assert(Subtarget.pairedVectorMemops() &&
18435 "256-bit vectors are only available when paired vector memops is "
18436 "enabled!");
18437 FlagSet |= PPC::MOF_Vector;
18438 } else
18439 llvm_unreachable("Not expecting illegal vectors!");
18440 } else { // Floating point type: can be scalar, f128 or vector types.
18441 if (Size == 32 || Size == 64)
18442 FlagSet |= PPC::MOF_ScalarFloat;
18443 else if (MemVT == MVT::f128 || MemVT.isVector())
18444 FlagSet |= PPC::MOF_Vector;
18445 else
18446 llvm_unreachable("Not expecting illegal scalar floats!");
18447 }
18448
18449 // Compute flags for address computation.
18450 computeFlagsForAddressComputation(N, FlagSet, DAG);
18451
18452 // Compute type extension flags.
18453 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18454 switch (LN->getExtensionType()) {
18455 case ISD::SEXTLOAD:
18456 FlagSet |= PPC::MOF_SExt;
18457 break;
18458 case ISD::EXTLOAD:
18459 case ISD::ZEXTLOAD:
18460 FlagSet |= PPC::MOF_ZExt;
18461 break;
18462 case ISD::NON_EXTLOAD:
18463 FlagSet |= PPC::MOF_NoExt;
18464 break;
18465 }
18466 } else
18467 FlagSet |= PPC::MOF_NoExt;
18468
18469 // For integers, no extension is the same as zero extension.
18470 // We set the extension mode to zero extension so we don't have
18471 // to add separate entries in AddrModesMap for loads and stores.
18472 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18473 FlagSet |= PPC::MOF_ZExt;
18474 FlagSet &= ~PPC::MOF_NoExt;
18475 }
18476
18477 // If we don't have prefixed instructions, 34-bit constants should be
18478 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18479 bool IsNonP1034BitConst =
18481 FlagSet) == PPC::MOF_RPlusSImm34;
18482 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18483 IsNonP1034BitConst)
18484 FlagSet |= PPC::MOF_NotAddNorCst;
18485
18486 return FlagSet;
18487}
18488
18489/// SelectForceXFormMode - Given the specified address, force it to be
18490/// represented as an indexed [r+r] operation (an XForm instruction).
18492 SDValue &Base,
18493 SelectionDAG &DAG) const {
18494
18496 int16_t ForceXFormImm = 0;
18497 if (provablyDisjointOr(DAG, N) &&
18498 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18499 Disp = N.getOperand(0);
18500 Base = N.getOperand(1);
18501 return Mode;
18502 }
18503
18504 // If the address is the result of an add, we will utilize the fact that the
18505 // address calculation includes an implicit add. However, we can reduce
18506 // register pressure if we do not materialize a constant just for use as the
18507 // index register. We only get rid of the add if it is not an add of a
18508 // value and a 16-bit signed constant and both have a single use.
18509 if (N.getOpcode() == ISD::ADD &&
18510 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18511 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18512 Disp = N.getOperand(0);
18513 Base = N.getOperand(1);
18514 return Mode;
18515 }
18516
18517 // Otherwise, use R0 as the base register.
18518 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18519 N.getValueType());
18520 Base = N;
18521
18522 return Mode;
18523}
18524
18526 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18527 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18528 EVT ValVT = Val.getValueType();
18529 // If we are splitting a scalar integer into f64 parts (i.e. so they
18530 // can be placed into VFRC registers), we need to zero extend and
18531 // bitcast the values. This will ensure the value is placed into a
18532 // VSR using direct moves or stack operations as needed.
18533 if (PartVT == MVT::f64 &&
18534 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18535 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18536 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18537 Parts[0] = Val;
18538 return true;
18539 }
18540 return false;
18541}
18542
18543SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18544 SelectionDAG &DAG) const {
18545 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18547 EVT RetVT = Op.getValueType();
18548 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18549 SDValue Callee =
18550 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18551 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18554 for (const SDValue &N : Op->op_values()) {
18555 EVT ArgVT = N.getValueType();
18556 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18557 Entry.Node = N;
18558 Entry.Ty = ArgTy;
18559 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18560 Entry.IsZExt = !Entry.IsSExt;
18561 Args.push_back(Entry);
18562 }
18563
18564 SDValue InChain = DAG.getEntryNode();
18565 SDValue TCChain = InChain;
18566 const Function &F = DAG.getMachineFunction().getFunction();
18567 bool isTailCall =
18568 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18569 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18570 if (isTailCall)
18571 InChain = TCChain;
18572 CLI.setDebugLoc(SDLoc(Op))
18573 .setChain(InChain)
18574 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18575 .setTailCall(isTailCall)
18576 .setSExtResult(SignExtend)
18577 .setZExtResult(!SignExtend)
18579 return TLI.LowerCallTo(CLI).first;
18580}
18581
18582SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18583 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18584 SelectionDAG &DAG) const {
18585 if (Op.getValueType() == MVT::f32)
18586 return lowerToLibCall(LibCallFloatName, Op, DAG);
18587
18588 if (Op.getValueType() == MVT::f64)
18589 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18590
18591 return SDValue();
18592}
18593
18594bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18595 SDNodeFlags Flags = Op.getNode()->getFlags();
18596 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18597 Flags.hasNoNaNs() && Flags.hasNoInfs();
18598}
18599
18600bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18601 return Op.getNode()->getFlags().hasApproximateFuncs();
18602}
18603
18604bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18606}
18607
18608SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18609 const char *LibCallFloatName,
18610 const char *LibCallDoubleNameFinite,
18611 const char *LibCallFloatNameFinite,
18612 SDValue Op,
18613 SelectionDAG &DAG) const {
18614 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18615 return SDValue();
18616
18617 if (!isLowringToMASSFiniteSafe(Op))
18618 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18619 DAG);
18620
18621 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18622 LibCallDoubleNameFinite, Op, DAG);
18623}
18624
18625SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18626 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18627 "__xl_powf_finite", Op, DAG);
18628}
18629
18630SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18631 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18632 "__xl_sinf_finite", Op, DAG);
18633}
18634
18635SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18636 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18637 "__xl_cosf_finite", Op, DAG);
18638}
18639
18640SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18641 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18642 "__xl_logf_finite", Op, DAG);
18643}
18644
18645SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18646 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18647 "__xl_log10f_finite", Op, DAG);
18648}
18649
18650SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18651 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18652 "__xl_expf_finite", Op, DAG);
18653}
18654
18655// If we happen to match to an aligned D-Form, check if the Frame Index is
18656// adequately aligned. If it is not, reset the mode to match to X-Form.
18657static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18658 PPC::AddrMode &Mode) {
18659 if (!isa<FrameIndexSDNode>(N))
18660 return;
18661 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18662 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18663 Mode = PPC::AM_XForm;
18664}
18665
18666/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18667/// compute the address flags of the node, get the optimal address mode based
18668/// on the flags, and set the Base and Disp based on the address mode.
18670 SDValue N, SDValue &Disp,
18671 SDValue &Base,
18672 SelectionDAG &DAG,
18673 MaybeAlign Align) const {
18674 SDLoc DL(Parent);
18675
18676 // Compute the address flags.
18677 unsigned Flags = computeMOFlags(Parent, N, DAG);
18678
18679 // Get the optimal address mode based on the Flags.
18680 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18681
18682 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18683 // Select an X-Form load if it is not.
18684 setXFormForUnalignedFI(N, Flags, Mode);
18685
18686 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18687 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18688 assert(Subtarget.isUsingPCRelativeCalls() &&
18689 "Must be using PC-Relative calls when a valid PC-Relative node is "
18690 "present!");
18691 Mode = PPC::AM_PCRel;
18692 }
18693
18694 // Set Base and Disp accordingly depending on the address mode.
18695 switch (Mode) {
18696 case PPC::AM_DForm:
18697 case PPC::AM_DSForm:
18698 case PPC::AM_DQForm: {
18699 // This is a register plus a 16-bit immediate. The base will be the
18700 // register and the displacement will be the immediate unless it
18701 // isn't sufficiently aligned.
18702 if (Flags & PPC::MOF_RPlusSImm16) {
18703 SDValue Op0 = N.getOperand(0);
18704 SDValue Op1 = N.getOperand(1);
18705 int16_t Imm = Op1->getAsZExtVal();
18706 if (!Align || isAligned(*Align, Imm)) {
18707 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18708 Base = Op0;
18709 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18710 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18711 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18712 }
18713 break;
18714 }
18715 }
18716 // This is a register plus the @lo relocation. The base is the register
18717 // and the displacement is the global address.
18718 else if (Flags & PPC::MOF_RPlusLo) {
18719 Disp = N.getOperand(1).getOperand(0); // The global address.
18724 Base = N.getOperand(0);
18725 break;
18726 }
18727 // This is a constant address at most 32 bits. The base will be
18728 // zero or load-immediate-shifted and the displacement will be
18729 // the low 16 bits of the address.
18730 else if (Flags & PPC::MOF_AddrIsSImm32) {
18731 auto *CN = cast<ConstantSDNode>(N);
18732 EVT CNType = CN->getValueType(0);
18733 uint64_t CNImm = CN->getZExtValue();
18734 // If this address fits entirely in a 16-bit sext immediate field, codegen
18735 // this as "d, 0".
18736 int16_t Imm;
18737 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18738 Disp = DAG.getTargetConstant(Imm, DL, CNType);
18739 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18740 CNType);
18741 break;
18742 }
18743 // Handle 32-bit sext immediate with LIS + Addr mode.
18744 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18745 (!Align || isAligned(*Align, CNImm))) {
18746 int32_t Addr = (int32_t)CNImm;
18747 // Otherwise, break this down into LIS + Disp.
18748 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18749 Base =
18750 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18751 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18752 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18753 break;
18754 }
18755 }
18756 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18757 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18758 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18759 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18760 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18761 } else
18762 Base = N;
18763 break;
18764 }
18765 case PPC::AM_PrefixDForm: {
18766 int64_t Imm34 = 0;
18767 unsigned Opcode = N.getOpcode();
18768 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18769 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18770 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18771 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18772 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18773 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18774 else
18775 Base = N.getOperand(0);
18776 } else if (isIntS34Immediate(N, Imm34)) {
18777 // The address is a 34-bit signed immediate.
18778 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18779 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18780 }
18781 break;
18782 }
18783 case PPC::AM_PCRel: {
18784 // When selecting PC-Relative instructions, "Base" is not utilized as
18785 // we select the address as [PC+imm].
18786 Disp = N;
18787 break;
18788 }
18789 case PPC::AM_None:
18790 break;
18791 default: { // By default, X-Form is always available to be selected.
18792 // When a frame index is not aligned, we also match by XForm.
18793 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18794 Base = FI ? N : N.getOperand(1);
18795 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18796 N.getValueType())
18797 : N.getOperand(0);
18798 break;
18799 }
18800 }
18801 return Mode;
18802}
18803
18805 bool Return,
18806 bool IsVarArg) const {
18807 switch (CC) {
18808 case CallingConv::Cold:
18809 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18810 default:
18811 return CC_PPC64_ELF;
18812 }
18813}
18814
18816 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18817}
18818
18821 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18822 if (shouldInlineQuadwordAtomics() && Size == 128)
18824
18825 switch (AI->getOperation()) {
18829 default:
18831 }
18832
18833 llvm_unreachable("unreachable atomicrmw operation");
18834}
18835
18838 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18839 if (shouldInlineQuadwordAtomics() && Size == 128)
18842}
18843
18844static Intrinsic::ID
18846 switch (BinOp) {
18847 default:
18848 llvm_unreachable("Unexpected AtomicRMW BinOp");
18850 return Intrinsic::ppc_atomicrmw_xchg_i128;
18851 case AtomicRMWInst::Add:
18852 return Intrinsic::ppc_atomicrmw_add_i128;
18853 case AtomicRMWInst::Sub:
18854 return Intrinsic::ppc_atomicrmw_sub_i128;
18855 case AtomicRMWInst::And:
18856 return Intrinsic::ppc_atomicrmw_and_i128;
18857 case AtomicRMWInst::Or:
18858 return Intrinsic::ppc_atomicrmw_or_i128;
18859 case AtomicRMWInst::Xor:
18860 return Intrinsic::ppc_atomicrmw_xor_i128;
18862 return Intrinsic::ppc_atomicrmw_nand_i128;
18863 }
18864}
18865
18867 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18868 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18869 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18870 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18871 Type *ValTy = Incr->getType();
18872 assert(ValTy->getPrimitiveSizeInBits() == 128);
18875 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18876 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18877 Value *IncrHi =
18878 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18879 Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
18880 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18881 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18882 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18883 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18884 return Builder.CreateOr(
18885 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18886}
18887
18889 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18890 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18891 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18892 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18893 Type *ValTy = CmpVal->getType();
18894 assert(ValTy->getPrimitiveSizeInBits() == 128);
18895 Function *IntCmpXchg =
18896 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18897 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18898 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18899 Value *CmpHi =
18900 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18901 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18902 Value *NewHi =
18903 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18904 emitLeadingFence(Builder, CI, Ord);
18905 Value *LoHi =
18906 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
18907 emitTrailingFence(Builder, CI, Ord);
18908 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18909 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18910 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18911 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18912 return Builder.CreateOr(
18913 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18914}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
This defines the Use class.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
Value * RHS
Value * LHS
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5337
bool isDenormal() const
Definition: APFloat.h:1361
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
APInt abs() const
Get the absolute value.
Definition: APInt.h:1751
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1678
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ Xor
*p = old ^ v
Definition: Instructions.h:722
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
BinOp getOperation() const
Definition: Instructions.h:787
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:890
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1971
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1523
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1385
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1391
unsigned arg_size() const
Definition: InstrTypes.h:1408
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:195
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:868
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:846
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:838
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:461
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:705
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:384
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:769
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:781
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:357
arg_iterator arg_begin()
Definition: Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
size_t arg_size() const
Definition: Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:219
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:232
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:588
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:267
bool hasHiddenVisibility() const
Definition: GlobalValue.h:250
StringRef getSection() const
Definition: Globals.cpp:183
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:124
bool hasComdat() const
Definition: GlobalValue.h:241
Type * getValueType() const
Definition: GlobalValue.h:296
bool hasProtectedVisibility() const
Definition: GlobalValue.h:251
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2536
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1454
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1433
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2041
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2027
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1514
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2432
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:174
bool isUnordered() const
Definition: Instructions.h:247
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
Metadata node.
Definition: Metadata.h:1069
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
bool isAIXFuncTLSModelOptInitDone() const
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:300
bool is32BitELFABI() const
Definition: PPCSubtarget.h:220
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:260
bool isAIXABI() const
Definition: PPCSubtarget.h:215
bool useSoftFloat() const
Definition: PPCSubtarget.h:175
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:143
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:203
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:254
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:272
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:146
bool isSVR4ABI() const
Definition: PPCSubtarget.h:216
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:135
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:207
bool isLittleEndian() const
Definition: PPCSubtarget.h:182
bool isTargetLinux() const
Definition: PPCSubtarget.h:213
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:278
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:290
bool is64BitELFABI() const
Definition: PPCSubtarget.h:219
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:156
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:296
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:153
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:266
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:489
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:493
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:746
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:741
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:488
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:782
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:813
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:859
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:500
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:753
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition: SmallPtrSet.h:95
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:435
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Class to represent struct types.
Definition: DerivedTypes.h:216
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:298
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:242
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:826
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1194
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1190
@ TargetConstantPool
Definition: ISDOpcodes.h:174
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1223
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1309
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:840
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:963
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1267
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:996
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1115
@ TargetJumpTable
Definition: ISDOpcodes.h:173
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1289
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1056
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:980
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1145
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1124
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1305
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1219
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:930
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1041
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:438
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:439
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1316
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1028
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1109
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:938
@ STRICT_FROUND
Definition: ISDOpcodes.h:442
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:463
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1165
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:886
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1250
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1276
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:919
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1162
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1214
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1138
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1208
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1388
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1273
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1636
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1552
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1583
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1642
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:92
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:200
@ MO_TPREL_HA
Definition: PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition: PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TPREL_LO
Definition: PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:160
@ MO_HA
Definition: PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:65
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:105
@ XTY_ER
External reference.
Definition: XCOFF.h:241
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:154
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:159
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1921
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:563
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:281
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:254
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:284
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:258
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:381
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:275
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:371
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:367
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:314
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:439
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:56
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)